Commit 2531d40b authored by Aric Stewart's avatar Aric Stewart Committed by Alexandre Julliard

usp10: Rewrite resolveExplicit for Unicode 6.3.

parent e50dc73b
...@@ -57,7 +57,7 @@ ...@@ -57,7 +57,7 @@
WINE_DEFAULT_DEBUG_CHANNEL(bidi); WINE_DEFAULT_DEBUG_CHANNEL(bidi);
#define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0) #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
#define MAX_LEVEL 61 #define MAX_DEPTH 125
/* HELPER FUNCTIONS AND DECLARATIONS */ /* HELPER FUNCTIONS AND DECLARATIONS */
...@@ -75,7 +75,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(bidi); ...@@ -75,7 +75,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(bidi);
enum directions enum directions
{ {
/* input types */ /* input types */
/* ON MUST be zero, code relies on ON = N = 0 */ /* ON MUST be zero, code relies on ON = NI = 0 */
ON = 0, /* Other Neutral */ ON = 0, /* Other Neutral */
L, /* Left Letter */ L, /* Left Letter */
R, /* Right Letter */ R, /* Right Letter */
...@@ -102,12 +102,53 @@ enum directions ...@@ -102,12 +102,53 @@ enum directions
LRE, LRE,
PDF, PDF,
LRI, /* Isolate formatting characters new with 6.3 */
RLI,
FSI,
PDI,
/* resolved types, also resolved directions */ /* resolved types, also resolved directions */
N = ON, /* alias, where ON, WS and S are treated the same */ NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
};
static const char debug_type[][4] =
{
"ON", /* Other Neutral */
"L", /* Left Letter */
"R", /* Right Letter */
"AN", /* Arabic Number */
"EN", /* European Number */
"AL", /* Arabic Letter (Right-to-left) */
"NSM", /* Non-spacing Mark */
"CS", /* Common Separator */
"ES", /* European Separator */
"ET", /* European Terminator (post/prefix e.g. $ and %) */
"BN", /* Boundary neutral (type of RLE etc after explicit levels) */
"S", /* Segment Separator (TAB) // used only in L1 */
"WS", /* White space // used only in L1 */
"B", /* Paragraph Separator (aka as PS) */
"RLO", /* these are used only in X1-X9 */
"RLE",
"LRO",
"LRE",
"PDF",
"LRI", /* Isolate formatting characters new with 6.3 */
"RLI",
"FSI",
"PDI",
}; };
/* HELPER FUNCTIONS */ /* HELPER FUNCTIONS */
static inline void dump_types(const char* header, WORD *types, int start, int end)
{
int i;
TRACE("%s:",header);
for (i = start; i< end; i++)
TRACE(" %s",debug_type[types[i]]);
TRACE("\n");
}
/* Convert the libwine information to the direction enum */ /* Convert the libwine information to the direction enum */
static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c) static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIPT_CONTROL *c)
{ {
...@@ -143,7 +184,7 @@ static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIP ...@@ -143,7 +184,7 @@ static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIP
switch (lpString[i]) switch (lpString[i])
{ {
case '-': case '-':
case '+': chartype[i] = N; break; case '+': chartype[i] = NI; break;
case '/': chartype[i] = CS; break; case '/': chartype[i] = CS; break;
} }
break; break;
...@@ -155,6 +196,10 @@ static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIP ...@@ -155,6 +196,10 @@ static void classify(LPCWSTR lpString, WORD *chartype, DWORD uCount, const SCRIP
case 0x202C: chartype[i] = PDF; break; case 0x202C: chartype[i] = PDF; break;
case 0x202D: chartype[i] = LRO; break; case 0x202D: chartype[i] = LRO; break;
case 0x202E: chartype[i] = RLO; break; case 0x202E: chartype[i] = RLO; break;
case 0x2066: chartype[i] = LRI; break;
case 0x2067: chartype[i] = RLI; break;
case 0x2068: chartype[i] = FSI; break;
case 0x2069: chartype[i] = PDI; break;
} }
break; break;
} }
...@@ -208,81 +253,204 @@ static WORD EmbeddingDirection(int level) ...@@ -208,81 +253,204 @@ static WORD EmbeddingDirection(int level)
the outermost call. The nesting counter counts the recursion the outermost call. The nesting counter counts the recursion
depth and not the embedding level. depth and not the embedding level.
------------------------------------------------------------------------*/ ------------------------------------------------------------------------*/
typedef struct tagStackItem {
int level;
int override;
BOOL isolate;
} StackItem;
static int resolveExplicit(int level, int dir, WORD *pcls, WORD *plevel, int cch, int nNest) #define push_stack(l,o,i) \
do { stack_top--; \
stack[stack_top].level = l; \
stack[stack_top].override = o; \
stack[stack_top].isolate = i;} while(0)
#define pop_stack() do { stack_top++; } while(0)
#define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, int count)
{ {
/* always called with a valid nesting level /* X1 */
nesting levels are != embedding levels */ int overflow_isolate_count = 0;
int nLastValid = nNest; int overflow_embedding_count = 0;
int ich = 0; int valid_isolate_count = 0;
int i;
/* check input values */ StackItem stack[MAX_DEPTH+2];
ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL); int stack_top = MAX_DEPTH+1;
/* process the text */ stack[stack_top].level = level;
for (; ich < cch; ich++) stack[stack_top].override = NI;
stack[stack_top].isolate = FALSE;
for (i = 0; i < count; i++)
{ {
WORD cls = pcls[ich]; /* X2 */
switch (cls) if (pclass[i] == RLE)
{ {
case LRO: int least_odd = GreaterOdd(stack[stack_top].level);
case LRE: poutLevel[i] = stack[stack_top].level;
nNest++; if (valid_level(least_odd))
if (GreaterEven(level) <= MAX_LEVEL - (cls == LRO ? 2 : 0)) push_stack(least_odd, NI, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
}
/* X3 */
else if (pclass[i] == LRE)
{
int least_even = GreaterEven(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_even))
push_stack(least_even, NI, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
}
/* X4 */
else if (pclass[i] == RLO)
{
int least_odd = GreaterOdd(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_odd))
push_stack(least_odd, R, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
}
/* X5 */
else if (pclass[i] == LRO)
{
int least_even = GreaterEven(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_even))
push_stack(least_even, L, FALSE);
else if (overflow_isolate_count == 0)
overflow_embedding_count++;
}
/* X5a */
else if (pclass[i] == RLI)
{
int least_odd = GreaterOdd(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_odd))
{ {
plevel[ich] = GreaterEven(level); valid_isolate_count++;
pcls[ich] = BN; push_stack(least_odd, NI, TRUE);
ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
&pcls[ich+1], &plevel[ich+1],
cch - (ich+1), nNest);
nNest--;
continue;
} }
cls = pcls[ich] = BN; else
break; overflow_isolate_count++;
pclass[i] = NI;
case RLO: }
case RLE: /* X5b */
nNest++; else if (pclass[i] == LRI)
if (GreaterOdd(level) <= MAX_LEVEL - (cls == RLO ? 2 : 0)) {
int least_even = GreaterEven(stack[stack_top].level);
poutLevel[i] = stack[stack_top].level;
if (valid_level(least_even))
{ {
plevel[ich] = GreaterOdd(level); valid_isolate_count++;
pcls[ich] = BN; push_stack(least_even, NI, TRUE);
ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
&pcls[ich+1], &plevel[ich+1],
cch - (ich+1), nNest);
nNest--;
continue;
} }
cls = pcls[ich] = BN; else
break; overflow_isolate_count++;
pclass[i] = NI;
}
/* X5c */
else if (pclass[i] == FSI)
{
int j;
int new_level = 0;
int skipping = 0;
poutLevel[i] = stack[stack_top].level;
for (j = i+1; j < count; j++)
{
if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
{
skipping++;
continue;
}
else if (pclass[j] == PDI)
{
if (skipping)
skipping --;
else
break;
continue;
}
case PDF: if (skipping) continue;
cls = pcls[ich] = BN;
if (nNest) if (pclass[j] == L)
{
new_level = 0;
break;
}
else if (pclass[j] == R || pclass[j] == AL)
{
new_level = 1;
break;
}
}
if (odd(new_level))
{ {
if (nLastValid < nNest) int least_odd = GreaterOdd(stack[stack_top].level);
if (valid_level(least_odd))
{ {
nNest--; valid_isolate_count++;
push_stack(least_odd, NI, TRUE);
} }
else else
overflow_isolate_count++;
}
else
{
int least_even = GreaterEven(stack[stack_top].level);
if (valid_level(least_even))
{ {
cch = ich; /* break the loop, but complete body */ valid_isolate_count++;
push_stack(least_even, NI, TRUE);
} }
else
overflow_isolate_count++;
} }
pclass[i] = NI;
} }
/* X6 */
/* Apply the override */ else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
if (dir != N)
{ {
cls = dir; poutLevel[i] = stack[stack_top].level;
if (stack[stack_top].override != NI)
pclass[i] = stack[stack_top].override;
} }
plevel[ich] = level; /* X6a */
if (pcls[ich] != BN) else if (pclass[i] == PDI)
pcls[ich] = cls; {
if (overflow_isolate_count) overflow_isolate_count--;
else if (!valid_isolate_count) {/* do nothing */}
else
{
overflow_embedding_count = 0;
while (!stack[stack_top].isolate) pop_stack();
pop_stack();
valid_isolate_count --;
}
poutLevel[i] = stack[stack_top].level;
pclass[i] = NI;
}
/* X7 */
else if (pclass[i] == PDF)
{
poutLevel[i] = stack[stack_top].level;
if (overflow_isolate_count) {/* do nothing */}
else if (overflow_embedding_count) overflow_embedding_count--;
else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
pop_stack();
}
/* X8: Nothing */
} }
/* X9: Based on 5.2 Retaining Explicit Formatting Characters */
return ich; for (i = 0; i < count ; i++)
if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
pclass[i] = BN;
} }
/* RESOLVE WEAK TYPES */ /* RESOLVE WEAK TYPES */
...@@ -318,7 +486,7 @@ enum states /* possible states */ ...@@ -318,7 +486,7 @@ enum states /* possible states */
static const int stateWeak[][10] = static const int stateWeak[][10] =
{ {
/* N, L, R, AN, EN, AL,NSM, CS, ES, ET */ /* NI, L, R, AN, EN, AL,NSM, CS, ES, ET */
/*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* Arabic letter */ /*xa*/ { ao, xl, xr, cn, cn, xa, xa, ao, ao, ao }, /* Arabic letter */
/*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */ /*xr*/ { ro, xl, xr, ra, re, xa, xr, ro, ro, rt }, /* right letter */
/*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */ /*xl*/ { lo, xl, xr, la, le, xa, xl, lo, lo, lt }, /* left letter */
...@@ -355,7 +523,7 @@ enum actions /* possible actions */ ...@@ -355,7 +523,7 @@ enum actions /* possible actions */
/* actions */ /* actions */
xxx = (XX << 4) + XX, /* no-op */ xxx = (XX << 4) + XX, /* no-op */
xIx = IX + xxx, /* increment run */ xIx = IX + xxx, /* increment run */
xxN = (XX << 4) + ON, /* set current to N */ xxN = (XX << 4) + ON, /* set current to NI */
xxE = (XX << 4) + EN, /* set current to EN */ xxE = (XX << 4) + EN, /* set current to EN */
xxA = (XX << 4) + AN, /* set current to AN */ xxA = (XX << 4) + AN, /* set current to AN */
xxR = (XX << 4) + R, /* set current to R */ xxR = (XX << 4) + R, /* set current to R */
...@@ -363,19 +531,19 @@ enum actions /* possible actions */ ...@@ -363,19 +531,19 @@ enum actions /* possible actions */
Nxx = (ON << 4) + 0xF, /* set run to neutral */ Nxx = (ON << 4) + 0xF, /* set run to neutral */
Axx = (AN << 4) + 0xF, /* set run to AN */ Axx = (AN << 4) + 0xF, /* set run to AN */
ExE = (EN << 4) + EN, /* set run to EN, set current to EN */ ExE = (EN << 4) + EN, /* set run to EN, set current to EN */
NIx = (ON << 4) + 0xF + IX, /* set run to N, increment */ NIx = (ON << 4) + 0xF + IX, /* set run to NI, increment */
NxN = (ON << 4) + ON, /* set run to N, set current to N */ NxN = (ON << 4) + ON, /* set run to NI, set current to NI */
NxR = (ON << 4) + R, /* set run to N, set current to R */ NxR = (ON << 4) + R, /* set run to NI, set current to R */
NxE = (ON << 4) + EN, /* set run to N, set current to EN */ NxE = (ON << 4) + EN, /* set run to NI, set current to EN */
AxA = (AN << 4) + AN, /* set run to AN, set current to AN */ AxA = (AN << 4) + AN, /* set run to AN, set current to AN */
NxL = (ON << 4) + L, /* set run to N, set current to L */ NxL = (ON << 4) + L, /* set run to NI, set current to L */
LxL = (L << 4) + L, /* set run to L, set current to L */ LxL = (L << 4) + L, /* set run to L, set current to L */
} ; } ;
static const int actionWeak[][10] = static const int actionWeak[][10] =
{ {
/* N, L, R, AN, EN, AL, NSM, CS, ES, ET */ /* NI, L, R, AN, EN, AL, NSM, CS, ES, ET */
/*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* Arabic letter */ /*xa*/ { xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN }, /* Arabic letter */
/*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */ /*xr*/ { xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx }, /* right letter */
/*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */ /*xl*/ { xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx }, /* left letter */
...@@ -425,7 +593,7 @@ static int GetResolvedType(int action) ...@@ -425,7 +593,7 @@ static int GetResolvedType(int action)
Input classes are of three kinds Input classes are of three kinds
- Static Input Token, where the class of the token remains - Static Input Token, where the class of the token remains
unchanged on output (AN, L, N, R) unchanged on output (AN, L, NI, R)
- Replaced Input Token, where the class of the token is - Replaced Input Token, where the class of the token is
always replaced on output (AL, BN, NSM, CS, ES, ET) always replaced on output (AL, BN, NSM, CS, ES, ET)
- Conditional Input Token, where the class of the token is - Conditional Input Token, where the class of the token is
...@@ -574,10 +742,10 @@ enum resolvestates ...@@ -574,10 +742,10 @@ enum resolvestates
/* new temporary class */ /* new temporary class */
r, /* R and characters resolved to R */ r, /* R and characters resolved to R */
l, /* L and characters resolved to L */ l, /* L and characters resolved to L */
rn, /* N preceded by right */ rn, /* NI preceded by right */
ln, /* N preceded by left */ ln, /* NI preceded by left */
a, /* AN preceded by left (the abbreviation 'la' is used up above) */ a, /* AN preceded by left (the abbreviation 'la' is used up above) */
na, /* N preceded by a */ na, /* NI preceded by a */
} ; } ;
...@@ -593,28 +761,28 @@ enum resolvestates ...@@ -593,28 +761,28 @@ enum resolvestates
static const int actionNeutrals[][5] = static const int actionNeutrals[][5] =
{ {
/* N, L, R, AN, EN = cls */ /* NI, L, R, AN, EN = cls */
{ In, 0, 0, 0, 0 }, /* r right */ { In, 0, 0, 0, 0 }, /* r right */
{ In, 0, 0, 0, L }, /* l left */ { In, 0, 0, 0, L }, /* l left */
{ In, En, Rn, Rn, Rn }, /* rn N preceded by right */ { In, En, Rn, Rn, Rn }, /* rn NI preceded by right */
{ In, Ln, En, En, LnL}, /* ln N preceded by left */ { In, Ln, En, En, LnL}, /* ln NI preceded by left */
{ In, 0, 0, 0, L }, /* a AN preceded by left */ { In, 0, 0, 0, L }, /* a AN preceded by left */
{ In, En, Rn, Rn, En }, /* na N preceded by a */ { In, En, Rn, Rn, En }, /* na NI preceded by a */
} ; } ;
static const int stateNeutrals[][5] = static const int stateNeutrals[][5] =
{ {
/* N, L, R, AN, EN */ /* NI, L, R, AN, EN */
{ rn, l, r, r, r }, /* r right */ { rn, l, r, r, r }, /* r right */
{ ln, l, r, a, l }, /* l left */ { ln, l, r, a, l }, /* l left */
{ rn, l, r, r, r }, /* rn N preceded by right */ { rn, l, r, r, r }, /* rn NI preceded by right */
{ ln, l, r, a, l }, /* ln N preceded by left */ { ln, l, r, a, l }, /* ln NI preceded by left */
{ na, l, r, a, l }, /* a AN preceded by left */ { na, l, r, a, l }, /* a AN preceded by left */
{ na, l, r, a, l }, /* na N preceded by la */ { na, l, r, a, l }, /* na NI preceded by la */
} ; } ;
/*------------------------------------------------------------------------ /*------------------------------------------------------------------------
...@@ -631,7 +799,7 @@ static const int stateNeutrals[][5] = ...@@ -631,7 +799,7 @@ static const int stateNeutrals[][5] =
In/Out: Array of directional classes In/Out: Array of directional classes
Note: On input only these directional classes are expected Note: On input only these directional classes are expected
R, L, N, AN, EN and BN R, L, NI, AN, EN and BN
W8 resolves a number of ENs to L W8 resolves a number of ENs to L
------------------------------------------------------------------------*/ ------------------------------------------------------------------------*/
...@@ -659,14 +827,14 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c ...@@ -659,14 +827,14 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c
continue; continue;
} }
ASSERT(pcls[ich] < 5); /* "Only N, L, R, AN, EN are allowed" */ ASSERT(pcls[ich] < 5); /* "Only NI, L, R, AN, EN are allowed" */
cls = pcls[ich]; cls = pcls[ich];
action = actionNeutrals[state][cls]; action = actionNeutrals[state][cls];
/* resolve the directionality for deferred runs */ /* resolve the directionality for deferred runs */
clsRun = GetDeferredNeutrals(action, level); clsRun = GetDeferredNeutrals(action, level);
if (clsRun != N) if (clsRun != NI)
{ {
SetDeferredRun(pcls, cchRun, ich, clsRun); SetDeferredRun(pcls, cchRun, ich, clsRun);
cchRun = 0; cchRun = 0;
...@@ -674,7 +842,7 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c ...@@ -674,7 +842,7 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c
/* resolve the directionality class at the current location */ /* resolve the directionality class at the current location */
clsNew = GetResolvedNeutrals(action); clsNew = GetResolvedNeutrals(action);
if (clsNew != N) if (clsNew != NI)
pcls[ich] = clsNew; pcls[ich] = clsNew;
if (In & action) if (In & action)
...@@ -689,7 +857,7 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c ...@@ -689,7 +857,7 @@ static void resolveNeutrals(int baselevel, WORD *pcls, const WORD *plevel, int c
/* resolve the directionality for deferred runs */ /* resolve the directionality for deferred runs */
clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level); clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
if (clsRun != N) if (clsRun != NI)
SetDeferredRun(pcls, cchRun, ich, clsRun); SetDeferredRun(pcls, cchRun, ich, clsRun);
} }
...@@ -763,6 +931,7 @@ BOOL BIDI_DetermineLevels( ...@@ -763,6 +931,7 @@ BOOL BIDI_DetermineLevels(
baselevel = s->uBidiLevel; baselevel = s->uBidiLevel;
classify(lpString, chartype, uCount, c); classify(lpString, chartype, uCount, c);
if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
for (j = 0; j < uCount; ++j) for (j = 0; j < uCount; ++j)
switch(chartype[j]) switch(chartype[j])
...@@ -770,12 +939,13 @@ BOOL BIDI_DetermineLevels( ...@@ -770,12 +939,13 @@ BOOL BIDI_DetermineLevels(
case B: case B:
case S: case S:
case WS: case WS:
case ON: chartype[j] = N; case ON: chartype[j] = NI;
default: continue; default: continue;
} }
/* resolve explicit */ /* resolve explicit */
resolveExplicit(baselevel, N, chartype, lpOutLevels, uCount, 0); resolveExplicit(baselevel, chartype, lpOutLevels, uCount);
if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
/* resolve weak */ /* resolve weak */
resolveWeak(baselevel, chartype, lpOutLevels, uCount); resolveWeak(baselevel, chartype, lpOutLevels, uCount);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment