Commit f22ba056 authored by Alexandre Julliard's avatar Alexandre Julliard

Added support for #pragma code_page.

Fixed a few bugs in language handling. Added error message when Ansi->Unicode conversion in the current codepage loses information.
parent b64583e5
......@@ -17,6 +17,7 @@
*/
LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_TRADITIONAL
#pragma code_page(936) /* FIXME: default for CHINESE_TRADITIONAL is 950 */
OPEN_FILE DIALOG LOADONCALL MOVEABLE DISCARDABLE 36, 24, 275, 134
STYLE DS_MODALFRAME | WS_POPUP | WS_CAPTION | WS_SYSMENU
......@@ -220,3 +221,5 @@ STRINGTABLE DISCARDABLE
IDS_REPORTVIEW "詳細資料"
IDS_TODESKTOP "顯示桌面"
}
#pragma code_page(default)
......@@ -17,6 +17,7 @@
*/
LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_TRADITIONAL
#pragma code_page(936) /* FIXME: default for CHINESE_TRADITIONAL is 950 */
SHELL_ABOUT_MSGBOX DIALOG LOADONCALL MOVEABLE DISCARDABLE 15, 40, 210, 152
STYLE DS_MODALFRAME | WS_POPUP | WS_VISIBLE | WS_CAPTION | WS_SYSMENU
......@@ -55,3 +56,5 @@ BEGIN
IDS_SHV_COLUMN6 "使用空間"
IDS_SHV_COLUMN7 "剩餘空間"
END
#pragma code_page(default)
......@@ -76,6 +76,7 @@ STRINGTABLE DISCARDABLE
}
LANGUAGE LANG_CHINESE, SUBLANG_CHINESE_TRADITIONAL
#pragma code_page(936) /* FIXME: default for CHINESE_TRADITIONAL is 950 */
SYSMENU MENU LOADONCALL MOVEABLE DISCARDABLE
{
......@@ -133,3 +134,5 @@ STRINGTABLE DISCARDABLE
{
MDI_IDS_MOREWINDOWS "更多視窗(&M)..." /* defined in mdi.h */
}
#pragma code_page(default)
......@@ -284,7 +284,7 @@ preprocessor
| tGCCLINE tNL /* The null-token */
| tERROR opt_text tNL { pperror("#error directive: '%s'", $2); if($2) free($2); }
| tWARNING opt_text tNL { ppwarning("#warning directive: '%s'", $2); if($2) free($2); }
| tPRAGMA opt_text tNL { if(pp_status.pedantic) ppwarning("#pragma ignored (arg: '%s')", $2); if($2) free($2); }
| tPRAGMA opt_text tNL { fprintf(ppout, "#pragma %s\n", $2 ? $2 : ""); if ($2) free($2); }
| tPPIDENT opt_text tNL { if(pp_status.pedantic) ppwarning("#ident ignored (arg: '%s')", $2); if($2) free($2); }
| tRCINCLUDE tRCINCLUDEPATH {
int nl=strlen($2) +3;
......
......@@ -82,6 +82,9 @@
%x pp_stripp_final
/* Set when scanning #line style directives */
%x pp_line
/* Set when scanning #pragma */
%x pp_pragma
%x pp_code_page
%option stack
%option never-interactive
......@@ -100,6 +103,7 @@ cident [a-zA-Z_][0-9a-zA-Z_]*
#include <ctype.h>
#include <assert.h>
#include "wine/unicode.h"
#include "wrc.h"
#include "utils.h"
#include "parser.h"
......@@ -115,7 +119,7 @@ cident [a-zA-Z_][0-9a-zA-Z_]*
#define YY_USER_ACTION char_number+=yyleng; wanted_id = want_id; want_id = 0;
static void addcchar(char c);
static void addwchar(short s);
static void addwchar(WCHAR s);
static string_t *get_buffered_cstring(void);
static string_t *get_buffered_wstring(void);
static string_t *make_string(char *s);
......@@ -123,13 +127,15 @@ static string_t *make_string(char *s);
static char *cbuffer; /* Buffers for string collection */
static int cbufidx;
static int cbufalloc = 0;
static short *wbuffer;
static WCHAR *wbuffer;
static int wbufidx;
static int wbufalloc = 0;
static int stripslevel = 0; /* Count {} during pp_strips/pp_stripe mode */
static int stripplevel = 0; /* Count () during pp_strips mode */
static int cjunk_tagline; /* Where did we start stripping (helps error tracking) */
static int current_codepage = -1; /* use language default */
/*
* This one is a bit tricky.
* We set 'want_id' in the parser to get the first
......@@ -316,6 +322,7 @@ static struct keyword *iskeyword(char *kw)
* because we only want to know the linenumber and
* filename.
*/
<INITIAL,pp_strips,pp_stripp>^{ws}*\#{ws}*pragma{ws}+ yy_push_state(pp_pragma);
<INITIAL,pp_strips,pp_stripp>^{ws}*\#{ws}* yy_push_state(pp_line);
<pp_line>[^\n]* {
int lineno;
......@@ -337,6 +344,23 @@ static struct keyword *iskeyword(char *kw)
input_name = xstrdup(fname);
}
<pp_pragma>code_page[^\n]* yyless(9); yy_pop_state(); yy_push_state(pp_code_page);
<pp_pragma>[^\n]* yy_pop_state(); if (pedantic) yywarning("Unrecognized #pragma directive '%s'",yytext);
<pp_code_page>\({ws}*default{ws}*\)[^\n]* current_codepage = -1; yy_pop_state();
<pp_code_page>\({ws}*[0-9]+{ws}*\)[^\n]* {
char *p = yytext;
yy_pop_state();
while (*p < '0' || *p > '9') p++;
current_codepage = strtol( p, NULL, 10 );
if (current_codepage && !cp_get_table( current_codepage ))
{
yyerror("Codepage %d not supported", current_codepage);
current_codepage = 0;
}
}
<pp_code_page>[^\n]* yy_pop_state(); yyerror("Malformed #pragma code_page directive");
/*
* Strip everything until a ';' taking
* into account braces {} for structures,
......@@ -418,16 +442,16 @@ L\" {
return tSTRING;
}
<yylstr>\\[0-7]{1,6} { /* octal escape sequence */
int result;
result = strtol(yytext+1, 0, 8);
unsigned int result;
result = strtoul(yytext+1, 0, 8);
if ( result > 0xffff )
yyerror("Character constant out of range");
addwchar((short)result);
addwchar((WCHAR)result);
}
<yylstr>\\x[0-9a-fA-F]{4} { /* hex escape sequence */
int result;
result = strtol(yytext+2, 0, 16);
addwchar((short)result);
unsigned int result;
result = strtoul(yytext+2, 0, 16);
addwchar((WCHAR)result);
}
<yylstr>\\x[0-9a-fA-F]{1,3} { yyerror("Invalid hex escape sequence '%s'", yytext); }
......@@ -441,8 +465,8 @@ L\" {
<yylstr>\\v addwchar('\v');
<yylstr>\\(\n|.) addwchar(yytext[1]);
<yylstr>\\\r\n addwchar(yytext[2]);
<yylstr>\"\" addcchar('\"'); /* "bla""bla" -> "bla\"bla" */
<yylstr>\\\"\" addcchar('\"'); /* "bla\""bla" -> "bla\"bla" */
<yylstr>\"\" addwchar('\"'); /* "bla""bla" -> "bla\"bla" */
<yylstr>\\\"\" addwchar('\"'); /* "bla\""bla" -> "bla\"bla" */
<yylstr>\"{ws}+\" ; /* "bla" "bla" -> "blabla" */
<yylstr>[^\\\n\"]+ {
char *yptr = yytext;
......@@ -596,7 +620,7 @@ static void addcchar(char c)
cbuffer[cbufidx++] = c;
}
static void addwchar(short s)
static void addwchar(WCHAR s)
{
if(wbufidx >= wbufalloc)
{
......@@ -605,27 +629,33 @@ static void addwchar(short s)
if(wbufalloc > 65536)
yywarning("Reallocating wide string buffer larger than 64kB");
}
/*
* BS 08-Aug-1999 FIXME: The '& 0xff' is probably a bug, but I have
* not experienced it yet and I seem to remember that this was for
* a reason. But, as so many things you tend to forget why.
* I guess that there were problems due to the sign extension of
* shorts WRT chars (e.g. 0x80 becomes 0xff80 instead of 0x0080).
* This should then be fixed in the lexer calling the function.
*/
wbuffer[wbufidx++] = (short)(s & 0xff);
wbuffer[wbufidx++] = s;
}
static string_t *get_buffered_cstring(void)
{
string_t *str = new_string();
str->size = cbufidx;
str->type = str_char;
str->str.cstr = (char *)xmalloc(cbufidx+1);
memcpy(str->str.cstr, cbuffer, cbufidx);
str->str.cstr[cbufidx] = '\0';
return str;
string_t *str = new_string();
str->size = cbufidx;
str->type = str_char;
str->str.cstr = (char *)xmalloc(cbufidx+1);
memcpy(str->str.cstr, cbuffer, cbufidx);
str->str.cstr[cbufidx] = '\0';
if (!current_codepage || current_codepage == -1 || !win32) /* store as ANSI string */
{
if (!current_codepage) yyerror("Codepage set to Unicode only, cannot use ASCII string here");
return str;
}
else /* convert to Unicode before storing */
{
string_t *str_w = convert_string( str, str_unicode, current_codepage );
if (!check_unicode_conversion( str, str_w, current_codepage ))
yyerror("String %s does not convert identically to Unicode and back in codepage %d. "
"Try using a Unicode string instead.", str->str.cstr, current_codepage );
free_string( str );
return str_w;
}
}
static string_t *get_buffered_wstring(void)
......@@ -633,8 +663,8 @@ static string_t *get_buffered_wstring(void)
string_t *str = new_string();
str->size = wbufidx;
str->type = str_unicode;
str->str.wstr = (short *)xmalloc(2*(wbufidx+1));
memcpy(str->str.wstr, wbuffer, wbufidx);
str->str.wstr = xmalloc((wbufidx+1)*sizeof(WCHAR));
memcpy(str->str.wstr, wbuffer, wbufidx*sizeof(WCHAR));
str->str.wstr[wbufidx] = 0;
return str;
}
......
......@@ -531,6 +531,8 @@ resource
yywarning("LANGUAGE not supported in 16-bit mode");
if(currentlanguage)
free(currentlanguage);
if (get_language_codepage($3, $5) == -1)
yyerror( "Language %04x is not supported", ($5<<10) + $3);
currentlanguage = new_language($3, $5);
$$ = NULL;
chat("Got LANGUAGE %d,%d (0x%04x)", $3, $5, ($5<<10) + $3);
......@@ -1752,7 +1754,10 @@ opt_lvc : /* Empty */ { $$ = new_lvc(); }
* The conflict is now moved to the expression handling below.
*/
opt_language
: tLANGUAGE expr ',' expr { $$ = new_language($2, $4); }
: tLANGUAGE expr ',' expr { $$ = new_language($2, $4);
if (get_language_codepage($2, $4) == -1)
yyerror( "Language %04x is not supported", ($4<<10) + $2);
}
;
opt_characts
......
......@@ -37,16 +37,16 @@ char *xstrdup(const char *str);
int yyerror(const char *s, ...) __attribute__((format (printf, 1, 2)));
int yywarning(const char *s, ...) __attribute__((format (printf, 1, 2)));
void internal_error(const char *file, int line, const char *s, ...) __attribute__((format (printf, 3, 4)));
void internal_error(const char *file, int line, const char *s, ...) __attribute__((format (printf, 3, 4), noreturn));
void error(const char *s, ...) __attribute__((format (printf, 1, 2)));
void warning(const char *s, ...) __attribute__((format (printf, 1, 2)));
void chat(const char *s, ...) __attribute__((format (printf, 1, 2)));
char *dup_basename(const char *name, const char *ext);
char *dupwstr2cstr(const short *str);
short *dupcstr2wstr(const char *str);
int compare_name_id(name_id_t *n1, name_id_t *n2);
string_t *convert_string(const string_t *str, enum str_e type);
void set_language(unsigned short lang, unsigned short sublang);
string_t *convert_string(const string_t *str, enum str_e type, int codepage);
void free_string( string_t *str );
int check_unicode_conversion( const string_t *str_a, const string_t *str_w, int codepage );
int get_language_codepage( unsigned short lang, unsigned short sublang );
#endif
......@@ -363,6 +363,8 @@ int main(int argc,char *argv[])
{
int lan;
lan = strtol(optarg, NULL, 0);
if (get_language_codepage(PRIMARYLANGID(lan), SUBLANGID(lan)) == -1)
error("Language %04x is not supported",lan);
currentlanguage = new_language(PRIMARYLANGID(lan), SUBLANGID(lan));
}
break;
......
......@@ -99,7 +99,7 @@ typedef struct string {
enum str_e type;
union {
char *cstr;
short *wstr;
WCHAR *wstr;
} str;
} string_t;
......
......@@ -243,26 +243,22 @@ static void write_name_str(FILE *fp, name_id_t *nid)
else if(!win32 && nid->name.s_name->type == str_unicode)
{
name_id_t lnid;
string_t str;
lnid.type = name_str;
lnid.name.s_name = &str;
str.type = str_char;
str.str.cstr = dupwstr2cstr(nid->name.s_name->str.wstr);
lnid.name.s_name = convert_string( nid->name.s_name, str_char,
get_language_codepage(0,0) );
write_name_str(fp, &lnid);
free(str.str.cstr);
free_string( lnid.name.s_name );
}
else if(win32 && nid->name.s_name->type == str_char)
{
name_id_t lnid;
string_t str;
lnid.type = name_str;
lnid.name.s_name = &str;
str.type = str_unicode;
str.str.wstr = dupcstr2wstr(nid->name.s_name->str.cstr);
lnid.name.s_name = convert_string( nid->name.s_name, str_unicode,
get_language_codepage(0,0) );
write_name_str(fp, &lnid);
free(str.str.wstr);
free_string( lnid.name.s_name );
}
else if(win32 && nid->name.s_name->type == str_unicode)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment