Commit c38be910 authored by Nikolay Sivov's avatar Nikolay Sivov Committed by Alexandre Julliard

dwrite: Improve script mapping generation.

parent 65feea4e
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -5,140 +5,129 @@ ...@@ -5,140 +5,129 @@
enum unicode_script_id { enum unicode_script_id {
Script_Unknown = 0, Script_Unknown = 0,
Script_Common = 1, Script_Common = 1,
Script_Adlam = 2, Script_Inherited = 2,
Script_Ahom = 3, Script_Arabic = 3,
Script_Anatolian_Hieroglyphs = 4, Script_Armenian = 4,
Script_Arabic = 5, Script_Avestan = 5,
Script_Armenian = 6, Script_Balinese = 6,
Script_Avestan = 7, Script_Bamum = 7,
Script_Balinese = 8, Script_Batak = 8,
Script_Bamum = 9, Script_Bengali = 9,
Script_Bassa_Vah = 10, Script_Bopomofo = 10,
Script_Batak = 11, Script_Brahmi = 11,
Script_Bengali = 12, Script_Braille = 12,
Script_Bhaiksuki = 13, Script_Buginese = 13,
Script_Bopomofo = 14, Script_Buhid = 14,
Script_Brahmi = 15, Script_Canadian_Aboriginal = 15,
Script_Braille = 16, Script_Carian = 16,
Script_Buginese = 17, Script_Cham = 17,
Script_Buhid = 18, Script_Cherokee = 18,
Script_Canadian_Aboriginal = 19, Script_Coptic = 19,
Script_Carian = 20, Script_Cuneiform = 20,
Script_Caucasian_Albanian = 21, Script_Cypriot = 21,
Script_Chakma = 22, Script_Cyrillic = 22,
Script_Cham = 23, Script_Deseret = 23,
Script_Cherokee = 24, Script_Devanagari = 24,
Script_Coptic = 25, Script_Egyptian_Hieroglyphs = 25,
Script_Cuneiform = 26, Script_Ethiopic = 26,
Script_Cypriot = 27, Script_Georgian = 27,
Script_Cyrillic = 28, Script_Glagolitic = 28,
Script_Deseret = 29, Script_Gothic = 29,
Script_Devanagari = 30, Script_Greek = 30,
Script_Duployan = 31, Script_Gujarati = 31,
Script_Egyptian_Hieroglyphs = 32, Script_Gurmukhi = 32,
Script_Elbasan = 33, Script_Han = 33,
Script_Ethiopic = 34, Script_Hangul = 34,
Script_Georgian = 35, Script_Hanunoo = 35,
Script_Glagolitic = 36, Script_Hebrew = 36,
Script_Gothic = 37, Script_Hiragana = 37,
Script_Grantha = 38, Script_Imperial_Aramaic = 38,
Script_Greek = 39, Script_Inscriptional_Pahlavi = 39,
Script_Gujarati = 40, Script_Inscriptional_Parthian = 40,
Script_Gurmukhi = 41, Script_Javanese = 41,
Script_Han = 42, Script_Kaithi = 42,
Script_Hangul = 43, Script_Kannada = 43,
Script_Hanunoo = 44, Script_Katakana = 44,
Script_Hatran = 45, Script_Kayah_Li = 45,
Script_Hebrew = 46, Script_Kharoshthi = 46,
Script_Hiragana = 47, Script_Khmer = 47,
Script_Imperial_Aramaic = 48, Script_Lao = 48,
Script_Inscriptional_Pahlavi = 49, Script_Latin = 49,
Script_Inscriptional_Parthian = 50, Script_Lepcha = 50,
Script_Javanese = 51, Script_Limbu = 51,
Script_Kaithi = 52, Script_Linear_B = 52,
Script_Kannada = 53, Script_Lisu = 53,
Script_Katakana = 54, Script_Lycian = 54,
Script_Kayah_Li = 55, Script_Lydian = 55,
Script_Kharoshthi = 56, Script_Malayalam = 56,
Script_Khmer = 57, Script_Mandaic = 57,
Script_Khojki = 58, Script_Meetei_Mayek = 58,
Script_Khudawadi = 59, Script_Mongolian = 59,
Script_Lao = 60, Script_Myanmar = 60,
Script_Latin = 61, Script_New_Tai_Lue = 61,
Script_Lepcha = 62, Script_Nko = 62,
Script_Limbu = 63, Script_Ogham = 63,
Script_Linear_A = 64, Script_Ol_Chiki = 64,
Script_Linear_B = 65, Script_Old_Italic = 65,
Script_Lisu = 66, Script_Old_Persian = 66,
Script_Lycian = 67, Script_Old_South_Arabian = 67,
Script_Lydian = 68, Script_Old_Turkic = 68,
Script_Mahajani = 69, Script_Oriya = 69,
Script_Malayalam = 70, Script_Osmanya = 70,
Script_Mandaic = 71, Script_Phags_Pa = 71,
Script_Manichaean = 72, Script_Phoenician = 72,
Script_Marchen = 73, Script_Rejang = 73,
Script_Meetei_Mayek = 74, Script_Runic = 74,
Script_Mende_Kikakui = 75, Script_Samaritan = 75,
Script_Meroitic_Cursive = 76, Script_Saurashtra = 76,
Script_Meroitic_Hieroglyphs = 77, Script_Shavian = 77,
Script_Miao = 78, Script_Sinhala = 78,
Script_Modi = 79, Script_Sundanese = 79,
Script_Mongolian = 80, Script_Syloti_Nagri = 80,
Script_Mro = 81, Script_Syriac = 81,
Script_Multani = 82, Script_Tagalog = 82,
Script_Myanmar = 83, Script_Tagbanwa = 83,
Script_Nabataean = 84, Script_Tai_Le = 84,
Script_New_Tai_Lue = 85, Script_Tai_Tham = 85,
Script_Newa = 86, Script_Tai_Viet = 86,
Script_Nko = 87, Script_Tamil = 87,
Script_Ogham = 88, Script_Telugu = 88,
Script_Ol_Chiki = 89, Script_Thaana = 89,
Script_Old_Hungarian = 90, Script_Thai = 90,
Script_Old_Italic = 91, Script_Tibetan = 91,
Script_Old_North_Arabian = 92, Script_Tifinagh = 92,
Script_Old_Permic = 93, Script_Ugaritic = 93,
Script_Old_Persian = 94, Script_Vai = 94,
Script_Old_South_Arabian = 95, Script_Yi = 95,
Script_Old_Turkic = 96, Script_Chakma = 96,
Script_Oriya = 97, Script_Meroitic_Cursive = 97,
Script_Osage = 98, Script_Meroitic_Hieroglyphs = 98,
Script_Osmanya = 99, Script_Miao = 99,
Script_Pahawh_Hmong = 100, Script_Sharada = 100,
Script_Palmyrene = 101, Script_Sora_Sompeng = 101,
Script_Pau_Cin_Hau = 102, Script_Takri = 102,
Script_Phags_Pa = 103, Script_Bassa_Vah = 103,
Script_Phoenician = 104, Script_Caucasian_Albanian = 104,
Script_Psalter_Pahlavi = 105, Script_Duployan = 105,
Script_Rejang = 106, Script_Elbasan = 106,
Script_Runic = 107, Script_Grantha = 107,
Script_Samaritan = 108, Script_Khojki = 108,
Script_Saurashtra = 109, Script_Khudawadi = 109,
Script_Sharada = 110, Script_Linear_A = 110,
Script_Shavian = 111, Script_Mahajani = 111,
Script_Siddham = 112, Script_Manichaean = 112,
Script_SignWriting = 113, Script_Mende_Kikakui = 113,
Script_Sinhala = 114, Script_Modi = 114,
Script_Sora_Sompeng = 115, Script_Mro = 115,
Script_Sundanese = 116, Script_Nabataean = 116,
Script_Syloti_Nagri = 117, Script_Old_North_Arabian = 117,
Script_Syriac = 118, Script_Old_Permic = 118,
Script_Tagalog = 119, Script_Pahawh_Hmong = 119,
Script_Tagbanwa = 120, Script_Palmyrene = 120,
Script_Tai_Le = 121, Script_Pau_Cin_Hau = 121,
Script_Tai_Tham = 122, Script_Psalter_Pahlavi = 122,
Script_Tai_Viet = 123, Script_Siddham = 123,
Script_Takri = 124, Script_Tirhuta = 124,
Script_Tamil = 125, Script_Warang_Citi = 125,
Script_Tangut = 126, Script_LastId = 125
Script_Telugu = 127,
Script_Thaana = 128,
Script_Thai = 129,
Script_Tibetan = 130,
Script_Tifinagh = 131,
Script_Tirhuta = 132,
Script_Ugaritic = 133,
Script_Vai = 134,
Script_Warang_Citi = 135,
Script_Yi = 136,
Script_LastId = 136
}; };
...@@ -1320,6 +1320,138 @@ sub dump_linebreak($) ...@@ -1320,6 +1320,138 @@ sub dump_linebreak($)
save_file($filename); save_file($filename);
} }
my %scripts =
(
"Unknown" => 0,
"Common" => 1,
"Inherited" => 2,
"Arabic" => 3,
"Armenian" => 4,
"Avestan" => 5,
"Balinese" => 6,
"Bamum" => 7,
"Batak" => 8,
"Bengali" => 9,
"Bopomofo" => 10,
"Brahmi" => 11,
"Braille" => 12,
"Buginese" => 13,
"Buhid" => 14,
"Canadian_Aboriginal" => 15,
"Carian" => 16,
"Cham" => 17,
"Cherokee" => 18,
"Coptic" => 19,
"Cuneiform" => 20,
"Cypriot" => 21,
"Cyrillic" => 22,
"Deseret" => 23,
"Devanagari" => 24,
"Egyptian_Hieroglyphs" => 25,
"Ethiopic" => 26,
"Georgian" => 27,
"Glagolitic" => 28,
"Gothic" => 29,
"Greek" => 30,
"Gujarati" => 31,
"Gurmukhi" => 32,
"Han" => 33,
"Hangul" => 34,
"Hanunoo" => 35,
"Hebrew" => 36,
"Hiragana" => 37,
"Imperial_Aramaic" => 38,
"Inscriptional_Pahlavi" => 39,
"Inscriptional_Parthian" => 40,
"Javanese" => 41,
"Kaithi" => 42,
"Kannada" => 43,
"Katakana" => 44,
"Kayah_Li" => 45,
"Kharoshthi" => 46,
"Khmer" => 47,
"Lao" => 48,
"Latin" => 49,
"Lepcha" => 50,
"Limbu" => 51,
"Linear_B" => 52,
"Lisu" => 53,
"Lycian" => 54,
"Lydian" => 55,
"Malayalam" => 56,
"Mandaic" => 57,
"Meetei_Mayek" => 58,
"Mongolian" => 59,
"Myanmar" => 60,
"New_Tai_Lue" => 61,
"Nko" => 62,
"Ogham" => 63,
"Ol_Chiki" => 64,
"Old_Italic" => 65,
"Old_Persian" => 66,
"Old_South_Arabian" => 67,
"Old_Turkic" => 68,
"Oriya" => 69,
"Osmanya" => 70,
"Phags_Pa" => 71,
"Phoenician" => 72,
"Rejang" => 73,
"Runic" => 74,
"Samaritan" => 75,
"Saurashtra" => 76,
"Shavian" => 77,
"Sinhala" => 78,
"Sundanese" => 79,
"Syloti_Nagri" => 80,
"Syriac" => 81,
"Tagalog" => 82,
"Tagbanwa" => 83,
"Tai_Le" => 84,
"Tai_Tham" => 85,
"Tai_Viet" => 86,
"Tamil" => 87,
"Telugu" => 88,
"Thaana" => 89,
"Thai" => 90,
"Tibetan" => 91,
"Tifinagh" => 92,
"Ugaritic" => 93,
"Vai" => 94,
"Yi" => 95,
# Win8/Win8.1
"Chakma" => 96,
"Meroitic_Cursive" => 97,
"Meroitic_Hieroglyphs" => 98,
"Miao" => 99,
"Sharada" => 100,
"Sora_Sompeng" => 101,
"Takri" => 102,
# Win10
"Bassa_Vah" => 103,
"Caucasian_Albanian" => 104,
"Duployan" => 105,
"Elbasan" => 106,
"Grantha" => 107,
"Khojki" => 108,
"Khudawadi" => 109,
"Linear_A" => 110,
"Mahajani" => 111,
"Manichaean" => 112,
"Mende_Kikakui" => 113,
"Modi" => 114,
"Mro" => 115,
"Nabataean" => 116,
"Old_North_Arabian" => 117,
"Old_Permic" => 118,
"Pahawh_Hmong" => 119,
"Palmyrene" => 120,
"Pau_Cin_Hau" => 121,
"Psalter_Pahlavi" => 122,
"Siddham" => 123,
"Tirhuta" => 124,
"Warang_Citi" => 125,
);
################################################################ ################################################################
# dump Script IDs table # dump Script IDs table
sub dump_scripts($) sub dump_scripts($)
...@@ -1328,10 +1460,11 @@ sub dump_scripts($) ...@@ -1328,10 +1460,11 @@ sub dump_scripts($)
my $header = $filename; my $header = $filename;
my @scripts_table; my @scripts_table;
my $script_index; my $script_index;
my %scripts;
my $i; my $i;
my $INPUT = open_data_file( $UNIDATA, "Scripts.txt" ); my $INPUT = open_data_file( $UNIDATA, "Scripts.txt" );
# Fill the table
# Unknown script id is always 0, so undefined scripts are automatically treated as such
while (<$INPUT>) while (<$INPUT>)
{ {
my $type = ""; my $type = "";
...@@ -1339,49 +1472,7 @@ sub dump_scripts($) ...@@ -1339,49 +1472,7 @@ sub dump_scripts($)
next if /^\#/; # skip comments next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([_a-zA-Z]+)\s*/) if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
{
$type = $2;
}
elsif (/^\s*([0-9a-fA-F]+)..\s*([0-9a-fA-F]+)\s*;\s*([_a-zA-Z]+)\s*/)
{
$type = $3;
}
else
{
die "malformed line $_";
}
# ignore some scripts
if ($type eq "Common" || $type eq "Inherited")
{
next;
}
$scripts{$type} = -1;
}
# assign script indices, starting from index 2
$script_index = 1;
foreach my $script (sort keys %scripts) {
$scripts{$script} = ++$script_index;
}
# indices change when new scripts are added to the standard,
# keep Unknown/Common at fixed positions, Inherited is treated as Unknown
$scripts{"Unknown"} = 0;
$scripts{"Inherited"} = 0;
$scripts{"Common"} = 1;
# now fill a table
seek $INPUT, 0, 0;
while (<$INPUT>)
{
my $type = "";
next if /^\#/; # skip comments
next if /^\s*$/; # skip empty lines
next if /\x1a/; # skip ^Z
if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z]+)\s*/)
{ {
$type = $2; $type = $2;
if (defined $scripts{$type}) if (defined $scripts{$type})
...@@ -1390,7 +1481,7 @@ sub dump_scripts($) ...@@ -1390,7 +1481,7 @@ sub dump_scripts($)
} }
next; next;
} }
elsif (/^\s*([0-9a-fA-F]+)..\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z]+)\s*/) elsif (/^\s*([0-9a-fA-F]+)..\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
{ {
$type = $3; $type = $3;
if (defined $scripts{$type}) if (defined $scripts{$type})
...@@ -1413,14 +1504,12 @@ sub dump_scripts($) ...@@ -1413,14 +1504,12 @@ sub dump_scripts($)
print OUTPUT "/* generated from $UNIDATA/Scripts.txt */\n"; print OUTPUT "/* generated from $UNIDATA/Scripts.txt */\n";
print OUTPUT "/* DO NOT EDIT!! */\n\n"; print OUTPUT "/* DO NOT EDIT!! */\n\n";
# Inherited was consumed by Unknown, we don't need it as a separate enum member
delete $scripts{"Inherited"};
print OUTPUT "enum unicode_script_id {\n"; print OUTPUT "enum unicode_script_id {\n";
foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts) foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts)
{ {
print OUTPUT " Script_$script = $scripts{$script},\n"; print OUTPUT " Script_$script = $scripts{$script},\n";
} }
print OUTPUT " Script_LastId = $script_index\n"; print OUTPUT " Script_LastId = ", (scalar keys %scripts) - 1, "\n";
print OUTPUT "};\n"; print OUTPUT "};\n";
close OUTPUT; close OUTPUT;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment