Commit de12f880 authored by Stefan Dösinger's avatar Stefan Dösinger Committed by Alexandre Julliard

wined3d: Don't enable the NV frag extensions if we don't need them.

Enabling the NV extensions occupies a temp register for some reason. Avoid needlessly enabling it.
parent 5a07bacd
...@@ -2058,6 +2058,7 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This, ...@@ -2058,6 +2058,7 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This); DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This);
struct shader_arb_ctx_priv priv_ctx; struct shader_arb_ctx_priv priv_ctx;
BOOL dcl_tmp = args->super.srgb_correction, dcl_td = FALSE; BOOL dcl_tmp = args->super.srgb_correction, dcl_td = FALSE;
BOOL want_nv_prog = FALSE;
char srgbtmp[4][4]; char srgbtmp[4][4];
unsigned int i, found = 0; unsigned int i, found = 0;
...@@ -2103,14 +2104,38 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This, ...@@ -2103,14 +2104,38 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
priv_ctx.cur_ps_args = args; priv_ctx.cur_ps_args = args;
list_init(&priv_ctx.if_frames); list_init(&priv_ctx.if_frames);
/* Avoid enabling NV_fragment_program* if we do not need it.
*
* Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register,
* and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation
* is faster than what we gain from using higher native instructions. There are some things though
* that cannot be emulated. In that case enable the extensions.
* If the extension is enabled, instruction handlers that support both ways will use it.
*
* Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program.
* So enable the best we can get.
*/
if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0)
{
want_nv_prog = TRUE;
}
shader_addline(buffer, "!!ARBfp1.0\n"); shader_addline(buffer, "!!ARBfp1.0\n");
if(GL_SUPPORT(NV_FRAGMENT_PROGRAM2)) { if(want_nv_prog && GL_SUPPORT(NV_FRAGMENT_PROGRAM2)) {
shader_addline(buffer, "OPTION NV_fragment_program2;\n"); shader_addline(buffer, "OPTION NV_fragment_program2;\n");
priv_ctx.target_version = NV3; priv_ctx.target_version = NV3;
} else if(GL_SUPPORT(NV_FRAGMENT_PROGRAM_OPTION)) { } else if(want_nv_prog && GL_SUPPORT(NV_FRAGMENT_PROGRAM_OPTION)) {
shader_addline(buffer, "OPTION NV_fragment_program;\n"); shader_addline(buffer, "OPTION NV_fragment_program;\n");
priv_ctx.target_version = NV2; priv_ctx.target_version = NV2;
} else { } else {
if(want_nv_prog)
{
/* This is an error - either we're advertising the wrong shader version, or aren't enforcing some
* limits properly
*/
ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n");
ERR("Try GLSL\n");
}
priv_ctx.target_version = ARB; priv_ctx.target_version = ARB;
} }
...@@ -2131,6 +2156,10 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This, ...@@ -2131,6 +2156,10 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
} }
} }
/* For now always declare the temps. At least the Nvidia assembler optimizes completely
* unused temps away(but occupies them for the whole shader if they're used once). Always
* declaring them avoids tricky bookkeeping work
*/
shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */ shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */
shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */ shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */
shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */ shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */
...@@ -2227,6 +2256,9 @@ static GLuint shader_arb_generate_vshader(IWineD3DVertexShaderImpl *This, ...@@ -2227,6 +2256,9 @@ static GLuint shader_arb_generate_vshader(IWineD3DVertexShaderImpl *This,
/* Create the hw ARB shader */ /* Create the hw ARB shader */
shader_addline(buffer, "!!ARBvp1.0\n"); shader_addline(buffer, "!!ARBvp1.0\n");
/* Always enable the NV extension if available. Unlike fragment shaders, there is no
* mesurable performance penalty, and we can always make use of it for clipplanes.
*/
if(GL_SUPPORT(NV_VERTEX_PROGRAM2_OPTION)) { if(GL_SUPPORT(NV_VERTEX_PROGRAM2_OPTION)) {
shader_addline(buffer, "OPTION NV_vertex_program2;\n"); shader_addline(buffer, "OPTION NV_vertex_program2;\n");
priv_ctx.target_version = NV2; priv_ctx.target_version = NV2;
......
...@@ -683,6 +683,10 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3 ...@@ -683,6 +683,10 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
{ {
reg_maps->usesdsy = 1; reg_maps->usesdsy = 1;
} }
else if (ins.handler_idx == WINED3DSIH_DSX)
{
reg_maps->usesdsx = 1;
}
else if(ins.handler_idx == WINED3DSIH_TEXLDD) else if(ins.handler_idx == WINED3DSIH_TEXLDD)
{ {
reg_maps->usestexldd = 1; reg_maps->usestexldd = 1;
......
...@@ -632,7 +632,7 @@ typedef struct shader_reg_maps ...@@ -632,7 +632,7 @@ typedef struct shader_reg_maps
WINED3DSAMPLER_TEXTURE_TYPE sampler_type[max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS)]; WINED3DSAMPLER_TEXTURE_TYPE sampler_type[max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS)];
BOOL bumpmat[MAX_TEXTURES], luminanceparams[MAX_TEXTURES]; BOOL bumpmat[MAX_TEXTURES], luminanceparams[MAX_TEXTURES];
char usesnrm, vpos, usesdsy, usestexldd, usesmova; char usesnrm, vpos, usesdsx, usesdsy, usestexldd, usesmova;
char usesrelconstF; char usesrelconstF;
/* Whether or not loops are used in this shader, and nesting depth */ /* Whether or not loops are used in this shader, and nesting depth */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment