Commit 7ab6c22f authored by Stefan Dösinger's avatar Stefan Dösinger Committed by Alexandre Julliard

wined3d: Support the full amount of constants in GLSL.

This patch advertises all GL reported float uniforms to the D3D app to reach the 256 constants required by Shader Model 3.0 on dx9 cards. If the shader does not use indirect addressing, all 256 constants are declared. The compiler can then figure out which constants are actually used. This makes shaders work that use high constant indices, but don't use all 256 constants.
parent 9f1731ed
...@@ -361,6 +361,7 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m ...@@ -361,6 +361,7 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m
unsigned int cur_loop_depth = 0, max_loop_depth = 0; unsigned int cur_loop_depth = 0, max_loop_depth = 0;
const DWORD* pToken = byte_code; const DWORD* pToken = byte_code;
char pshader; char pshader;
unsigned int intconst = 0, boolconst = 0;
/* There are some minor differences between pixel and vertex shaders */ /* There are some minor differences between pixel and vertex shaders */
...@@ -490,13 +491,21 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m ...@@ -490,13 +491,21 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m
else if (ins.handler_idx == WINED3DSIH_LOOP else if (ins.handler_idx == WINED3DSIH_LOOP
|| ins.handler_idx == WINED3DSIH_REP) || ins.handler_idx == WINED3DSIH_REP)
{ {
DWORD reg;
if(ins.handler_idx == WINED3DSIH_LOOP) {
reg = pToken[1];
} else {
reg = pToken[0];
}
cur_loop_depth++; cur_loop_depth++;
if(cur_loop_depth > max_loop_depth) if(cur_loop_depth > max_loop_depth)
max_loop_depth = cur_loop_depth; max_loop_depth = cur_loop_depth;
pToken += param_size; pToken += param_size;
/* Rep and Loop always use an integer constant for the control parameters */ /* Rep and Loop always use an integer constant for the control parameters */
This->baseShader.uses_int_consts = TRUE; intconst |= (1 << (reg & WINED3DSP_REGNUM_MASK));
} }
else if (ins.handler_idx == WINED3DSIH_ENDLOOP else if (ins.handler_idx == WINED3DSIH_ENDLOOP
|| ins.handler_idx == WINED3DSIH_ENDREP) || ins.handler_idx == WINED3DSIH_ENDREP)
...@@ -628,10 +637,10 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m ...@@ -628,10 +637,10 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m
} }
} }
else if(WINED3DSPR_CONSTINT == regtype) { else if(WINED3DSPR_CONSTINT == regtype) {
This->baseShader.uses_int_consts = TRUE; intconst |= (1 << reg);
} }
else if(WINED3DSPR_CONSTBOOL == regtype) { else if(WINED3DSPR_CONSTBOOL == regtype) {
This->baseShader.uses_bool_consts = TRUE; boolconst |= (1 << reg);
} }
/* WINED3DSPR_TEXCRDOUT is the same as WINED3DSPR_OUTPUT. _OUTPUT can be > MAX_REG_TEXCRD and is used /* WINED3DSPR_TEXCRDOUT is the same as WINED3DSPR_OUTPUT. _OUTPUT can be > MAX_REG_TEXCRD and is used
...@@ -650,6 +659,8 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m ...@@ -650,6 +659,8 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, struct shader_reg_m
reg_maps->loop_depth = max_loop_depth; reg_maps->loop_depth = max_loop_depth;
This->baseShader.functionLength = ((char *)pToken - (char *)byte_code); This->baseShader.functionLength = ((char *)pToken - (char *)byte_code);
This->baseShader.num_bool_consts = count_bits(boolconst);
This->baseShader.num_int_consts = count_bits(intconst);
return WINED3D_OK; return WINED3D_OK;
} }
......
...@@ -558,13 +558,13 @@ static void shader_glsl_load_constants( ...@@ -558,13 +558,13 @@ static void shader_glsl_load_constants(
prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version); prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
/* Load DirectX 9 integer constants/uniforms for vertex shader */ /* Load DirectX 9 integer constants/uniforms for vertex shader */
if(vshader->baseShader.uses_int_consts) { if(vshader->baseShader.num_int_consts) {
shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations, shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations,
stateBlock->vertexShaderConstantI, stateBlock->changed.vertexShaderConstantsI); stateBlock->vertexShaderConstantI, stateBlock->changed.vertexShaderConstantsI);
} }
/* Load DirectX 9 boolean constants/uniforms for vertex shader */ /* Load DirectX 9 boolean constants/uniforms for vertex shader */
if(vshader->baseShader.uses_bool_consts) { if(vshader->baseShader.num_bool_consts) {
shader_glsl_load_constantsB(vshader, gl_info, programId, shader_glsl_load_constantsB(vshader, gl_info, programId,
stateBlock->vertexShaderConstantB, stateBlock->changed.vertexShaderConstantsB); stateBlock->vertexShaderConstantB, stateBlock->changed.vertexShaderConstantsB);
} }
...@@ -583,13 +583,13 @@ static void shader_glsl_load_constants( ...@@ -583,13 +583,13 @@ static void shader_glsl_load_constants(
prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version); prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
/* Load DirectX 9 integer constants/uniforms for pixel shader */ /* Load DirectX 9 integer constants/uniforms for pixel shader */
if(pshader->baseShader.uses_int_consts) { if(pshader->baseShader.num_int_consts) {
shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations, shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations,
stateBlock->pixelShaderConstantI, stateBlock->changed.pixelShaderConstantsI); stateBlock->pixelShaderConstantI, stateBlock->changed.pixelShaderConstantsI);
} }
/* Load DirectX 9 boolean constants/uniforms for pixel shader */ /* Load DirectX 9 boolean constants/uniforms for pixel shader */
if(pshader->baseShader.uses_bool_consts) { if(pshader->baseShader.num_bool_consts) {
shader_glsl_load_constantsB(pshader, gl_info, programId, shader_glsl_load_constantsB(pshader, gl_info, programId,
stateBlock->pixelShaderConstantB, stateBlock->changed.pixelShaderConstantsB); stateBlock->pixelShaderConstantB, stateBlock->changed.pixelShaderConstantsB);
} }
...@@ -724,21 +724,53 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s ...@@ -724,21 +724,53 @@ static void shader_generate_glsl_declarations(IWineD3DBaseShader *iface, const s
/* Declare the constants (aka uniforms) */ /* Declare the constants (aka uniforms) */
if (This->baseShader.limits.constant_float > 0) { if (This->baseShader.limits.constant_float > 0) {
unsigned max_constantsF; unsigned max_constantsF;
/* Unless the shader uses indirect addressing, always declare the maximum array size and ignore that we need some
* uniforms privately. E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup and immediate values, still
* declare VC[256]. If the shader needs more uniforms than we have it won't work in any case. If it uses less, the
* compiler will figure out which uniforms are really used and strip them out. This allows a shader to use c255 on
* a dx9 card, as long as it doesn't also use all the other constants.
*
* If the shader uses indirect addressing the compiler must assume that all declared uniforms are used. In this case,
* declare only the amount that we're assured to have.
*
* Thus we run into problems in these two cases:
* 1) The shader really uses more uniforms than supported
* 2) The shader uses indirect addressing, less constants than supported, but uses a constant index > #supported consts
*/
if(pshader) { if(pshader) {
max_constantsF = GL_LIMITS(pshader_constantsF) - (MAX_CONST_B / 4) - MAX_CONST_I - 2; /* No indirect addressing here */
max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF); max_constantsF = GL_LIMITS(pshader_constantsF);
} else { } else {
/* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix) */ if(This->baseShader.reg_maps.usesrelconstF) {
max_constantsF = GL_LIMITS(vshader_constantsF) - (MAX_CONST_B / 4) - MAX_CONST_I - 1; /* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix).
max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF); * Subtract another uniform for immediate values, which have to be loaded via uniform by the driver as well.
* The shader code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex shader code, so one vec4 should be enough
* (Unfortunately the Nvidia driver doesn't store 128 and -128 in one float
*/
max_constantsF = GL_LIMITS(vshader_constantsF) - 3;
max_constantsF -= This->baseShader.num_int_consts;
/* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
* so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
* for now take this into account when calculating the number of available constants
*/
max_constantsF -= This->baseShader.num_bool_consts;
/* Set by driver quirks in directx.c */
max_constantsF -= GLINFO_LOCATION.reserved_glsl_constants;
} else {
max_constantsF = GL_LIMITS(vshader_constantsF);
}
} }
max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF);
shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF); shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
} }
if (This->baseShader.limits.constant_int > 0) /* Always declare the full set of constants, the compiler can remove the unused ones because d3d doesn't(yet)
* support indirect int and bool constant addressing. This avoids problems if the app uses e.g. i0 and i9.
*/
if (This->baseShader.limits.constant_int > 0 && This->baseShader.num_int_consts)
shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int); shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);
if (This->baseShader.limits.constant_bool > 0) if (This->baseShader.limits.constant_bool > 0 && This->baseShader.num_bool_consts)
shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool); shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);
if(!pshader) { if(!pshader) {
...@@ -4117,8 +4149,7 @@ static void shader_glsl_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info * ...@@ -4117,8 +4149,7 @@ static void shader_glsl_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info *
else else
pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0); pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0);
TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff); TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff);
/* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix) */ pCaps->MaxVertexShaderConst = GL_LIMITS(vshader_constantsF);
pCaps->MaxVertexShaderConst = GL_LIMITS(vshader_constantsF) - (MAX_CONST_B / 4) - MAX_CONST_I - 1;
/* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b. /* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b.
* In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed * In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed
...@@ -4136,12 +4167,7 @@ static void shader_glsl_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info * ...@@ -4136,12 +4167,7 @@ static void shader_glsl_get_caps(WINED3DDEVTYPE devtype, const WineD3D_GL_Info *
else else
pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0); pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0);
/* Subtract the other potential uniforms from the max available (bools & ints), and 2 states for fog. pCaps->MaxPixelShaderConst = GL_LIMITS(pshader_constantsF);
* In theory the texbem instruction may need one more shader constant too. But lets assume
* that a sm <= 1.3 shader does not need all the uniforms provided by a glsl-capable card,
* and lets not take away a uniform needlessly from all other shaders.
*/
pCaps->MaxPixelShaderConst = GL_LIMITS(pshader_constantsF) - (MAX_CONST_B / 4) - MAX_CONST_I - 2;
/* FIXME: The following line is card dependent. -8.0 to 8.0 is the /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
* Direct3D minimum requirement. * Direct3D minimum requirement.
......
...@@ -2244,6 +2244,7 @@ BOOL getDepthStencilBits(const struct GlPixelFormatDesc *format_desc, short *dep ...@@ -2244,6 +2244,7 @@ BOOL getDepthStencilBits(const struct GlPixelFormatDesc *format_desc, short *dep
/* Math utils */ /* Math utils */
void multiply_matrix(WINED3DMATRIX *dest, const WINED3DMATRIX *src1, const WINED3DMATRIX *src2); void multiply_matrix(WINED3DMATRIX *dest, const WINED3DMATRIX *src1, const WINED3DMATRIX *src2);
UINT wined3d_log2i(UINT32 x); UINT wined3d_log2i(UINT32 x);
unsigned int count_bits(unsigned int mask);
typedef struct local_constant { typedef struct local_constant {
struct list entry; struct list entry;
...@@ -2318,7 +2319,7 @@ typedef struct IWineD3DBaseShaderClass ...@@ -2318,7 +2319,7 @@ typedef struct IWineD3DBaseShaderClass
UINT functionLength; UINT functionLength;
UINT cur_loop_depth, cur_loop_regno; UINT cur_loop_depth, cur_loop_regno;
BOOL load_local_constsF; BOOL load_local_constsF;
BOOL uses_bool_consts, uses_int_consts; BOOL num_bool_consts, num_int_consts;
/* Type of shader backend */ /* Type of shader backend */
int shader_mode; int shader_mode;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment