Commit e36025a2 authored by Nikolay Sivov's avatar Nikolay Sivov Committed by Alexandre Julliard

dwrite: Implement itemization by script id using Unicode.org data.

parent b5eec813
......@@ -9,4 +9,5 @@ C_SRCS = \
layout.c \
linebreak.c \
main.c \
opentype.c
opentype.c \
scripts.c
/* Unicode Script IDs */
/* generated from http://www.unicode.org/Public/7.0.0/ucd/Scripts.txt */
/* DO NOT EDIT!! */
enum unicode_script_id {
Script_Unknown = 0,
Script_Control = 1,
Script_Arabic = 2,
Script_Armenian = 3,
Script_Avestan = 4,
Script_Balinese = 5,
Script_Bamum = 6,
Script_Bassa = 7,
Script_Batak = 8,
Script_Bengali = 9,
Script_Bopomofo = 10,
Script_Brahmi = 11,
Script_Braille = 12,
Script_Buginese = 13,
Script_Buhid = 14,
Script_Canadian = 15,
Script_Carian = 16,
Script_Caucasian = 17,
Script_Chakma = 18,
Script_Cham = 19,
Script_Cherokee = 20,
Script_Coptic = 21,
Script_Cuneiform = 22,
Script_Cypriot = 23,
Script_Cyrillic = 24,
Script_Deseret = 25,
Script_Devanagari = 26,
Script_Duployan = 27,
Script_Egyptian = 28,
Script_Elbasan = 29,
Script_Ethiopic = 30,
Script_Georgian = 31,
Script_Glagolitic = 32,
Script_Gothic = 33,
Script_Grantha = 34,
Script_Greek = 35,
Script_Gujarati = 36,
Script_Gurmukhi = 37,
Script_Han = 38,
Script_Hangul = 39,
Script_Hanunoo = 40,
Script_Hebrew = 41,
Script_Hiragana = 42,
Script_Imperial = 43,
Script_Inscriptional = 44,
Script_Javanese = 45,
Script_Kaithi = 46,
Script_Kannada = 47,
Script_Katakana = 48,
Script_Kayah = 49,
Script_Kharoshthi = 50,
Script_Khmer = 51,
Script_Khojki = 52,
Script_Khudawadi = 53,
Script_Lao = 54,
Script_Latin = 55,
Script_Lepcha = 56,
Script_Limbu = 57,
Script_Linear = 58,
Script_Lisu = 59,
Script_Lycian = 60,
Script_Lydian = 61,
Script_Mahajani = 62,
Script_Malayalam = 63,
Script_Mandaic = 64,
Script_Manichaean = 65,
Script_Meetei = 66,
Script_Mende = 67,
Script_Meroitic = 68,
Script_Miao = 69,
Script_Modi = 70,
Script_Mongolian = 71,
Script_Mro = 72,
Script_Myanmar = 73,
Script_Nabataean = 74,
Script_New = 75,
Script_Nko = 76,
Script_Ogham = 77,
Script_Ol = 78,
Script_Old = 79,
Script_Oriya = 80,
Script_Osmanya = 81,
Script_Pahawh = 82,
Script_Palmyrene = 83,
Script_Pau = 84,
Script_Phags = 85,
Script_Phoenician = 86,
Script_Psalter = 87,
Script_Rejang = 88,
Script_Runic = 89,
Script_Samaritan = 90,
Script_Saurashtra = 91,
Script_Sharada = 92,
Script_Shavian = 93,
Script_Siddham = 94,
Script_Sinhala = 95,
Script_Sora = 96,
Script_Sundanese = 97,
Script_Syloti = 98,
Script_Syriac = 99,
Script_Tagalog = 100,
Script_Tagbanwa = 101,
Script_Tai = 102,
Script_Takri = 103,
Script_Tamil = 104,
Script_Telugu = 105,
Script_Thaana = 106,
Script_Thai = 107,
Script_Tibetan = 108,
Script_Tifinagh = 109,
Script_Tirhuta = 110,
Script_Ugaritic = 111,
Script_Vai = 112,
Script_Warang = 113,
Script_Yi = 114,
};
......@@ -1172,7 +1172,7 @@ sub get_lb_ranges()
sub dump_indic($)
{
my $filename = shift;
my @indic_table = ($indic_types{'Other'}) x 65536;;
my @indic_table = ($indic_types{'Other'}) x 65536;
my $INPUT = open_data_file( $UNIDATA, "IndicSyllabicCategory.txt" );
while (<$INPUT>)
......@@ -1252,7 +1252,7 @@ sub dump_indic($)
sub dump_linebreak($)
{
my $filename = shift;
my @break_table = ($break_types{'XX'}) x 65536;;
my @break_table = ($break_types{'XX'}) x 65536;
my $next_group = 0;
my $INPUT = open_data_file( $UNIDATA, "LineBreak.txt" );
......@@ -1294,6 +1294,118 @@ sub dump_linebreak($)
save_file($filename);
}
################################################################
# dump Script IDs table
sub dump_scripts($)
{
my $filename = shift;
my $header = $filename;
my @scripts_table = (0) x 65536; # 0 means unknown script
my $next_group = 0;
my %scripts;
my $i;
my $INPUT = open_data_file( $UNIDATA, "Scripts.txt" );
while (<$INPUT>)
{
my $type = "";
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z]+)\s*/)
{
$type = $2;
}
elsif (/^\s*([0-9a-fA-F]+)..\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z]+)\s*/)
{
$type = $3;
}
else
{
die "malformed line $_";
}
# ignore some scripts
if ($type eq "" || $type eq "Common" || $type eq "Inherited")
{
next;
}
$scripts{$type} = -1;
}
$i = 2;
foreach my $script (sort keys %scripts) {
$scripts{$script} = $i;
$i++;
}
# now fill a table
seek $INPUT, 0, 0;
while (<$INPUT>)
{
my $type = "";
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z]+)\s*/)
{
$type = $2;
if (defined $scripts{$type})
{
$scripts_table[hex $1] = $scripts{$type};
}
next;
}
elsif (/^\s*([0-9a-fA-F]+)..\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z]+)\s*/)
{
$type = $3;
if (defined $scripts{$type})
{
foreach my $i (hex $1 .. hex $2)
{
$scripts_table[$i] = $scripts{$type};
}
}
next;
}
}
close $INPUT;
$header = "$filename.h";
open OUTPUT,">$header.new" or die "Cannot create $header";
print "Building $header\n";
print OUTPUT "/* Unicode Script IDs */\n";
print OUTPUT "/* generated from $UNIDATA/Scripts.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
# reserve Unknown and Control ids
print OUTPUT "enum unicode_script_id {\n";
print OUTPUT " Script_Unknown = 0,\n";
print OUTPUT " Script_Control = 1,\n";
foreach my $script (sort keys %scripts)
{
print OUTPUT " Script_$script = $scripts{$script},\n";
}
print OUTPUT "};\n";
close OUTPUT;
save_file($header);
$filename = "$filename.c";
open OUTPUT,">$filename.new" or die "Cannot create $header";
print "Building $filename\n";
print OUTPUT "/* Unicode Script IDs */\n";
print OUTPUT "/* generated from $UNIDATA/Scripts.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n";
dump_two_level_mapping( "wine_scripts_table", @scripts_table);
close OUTPUT;
save_file($filename);
}
################################################################
# dump the BiDi mirroring table
......@@ -1333,7 +1445,7 @@ sub dump_mirroring($)
sub dump_bracket($)
{
my $filename = shift;
my @bracket_table = (0) x 65536;;
my @bracket_table = (0) x 65536;
my $INPUT = open_data_file( $UNIDATA, "BidiBrackets.txt" );
while (<$INPUT>)
......@@ -2275,6 +2387,7 @@ dump_bracket( "dlls/usp10/bracket.c" );
dump_shaping( "dlls/usp10/shaping.c" );
dump_linebreak( "dlls/usp10/linebreak.c" );
dump_linebreak( "dlls/dwrite/linebreak.c" );
dump_scripts( "dlls/dwrite/scripts" );
dump_indic( "dlls/usp10/indicsyllable.c" );
dump_intl_nls("loader/l_intl.nls");
dump_vertical( "dlls/gdi32/vertical.c" );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment