Commit 37d1e8b7 authored by Henri Verbeet's avatar Henri Verbeet Committed by Alexandre Julliard

wined3d: Try to avoid redundant constant updates.

This gives a minor performance improvement. For example, in 3DMark03 Game Test 1 this gives about a 4% improvement, in the Counter Strike: Source stress test it's about 1%. (NVIDIA GF9600M, Intel T9550, 1024x768).
parent c1af4050
......@@ -648,7 +648,7 @@ static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *g
checkGLcall("Load vs int consts");
}
static void shader_arb_select(void *shader_priv, const struct wined3d_context *context,
static void shader_arb_select(void *shader_priv, struct wined3d_context *context,
const struct wined3d_state *state);
/**
......@@ -659,7 +659,7 @@ static void shader_arb_select(void *shader_priv, const struct wined3d_context *c
*/
/* Context activation is done by the caller (state handler). */
static void shader_arb_load_constants_internal(struct shader_arb_priv *priv,
const struct wined3d_context *context, const struct wined3d_state *state,
struct wined3d_context *context, const struct wined3d_state *state,
BOOL usePixelShader, BOOL useVertexShader, BOOL from_shader_select)
{
const struct wined3d_d3d_info *d3d_info = context->d3d_info;
......@@ -723,7 +723,7 @@ static void shader_arb_load_constants_internal(struct shader_arb_priv *priv,
}
}
static void shader_arb_load_constants(void *shader_priv, const struct wined3d_context *context,
static void shader_arb_load_constants(void *shader_priv, struct wined3d_context *context,
const struct wined3d_state *state)
{
BOOL vs = use_vs(state);
......@@ -736,6 +736,12 @@ static void shader_arb_update_float_vertex_constants(struct wined3d_device *devi
{
struct wined3d_context *context = context_get_current();
struct shader_arb_priv *priv = device->shader_priv;
unsigned int i;
for (i = 0; i < device->context_count; ++i)
{
device->contexts[i]->constant_update_mask |= WINED3D_SHADER_CONST_VS_F;
}
/* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
* context. On a context switch the old context will be fully dirtified */
......@@ -749,6 +755,12 @@ static void shader_arb_update_float_pixel_constants(struct wined3d_device *devic
{
struct wined3d_context *context = context_get_current();
struct shader_arb_priv *priv = device->shader_priv;
unsigned int i;
for (i = 0; i < device->context_count; ++i)
{
device->contexts[i]->constant_update_mask |= WINED3D_SHADER_CONST_PS_F;
}
/* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
* context. On a context switch the old context will be fully dirtified */
......@@ -4629,7 +4641,7 @@ static void find_arb_vs_compile_args(const struct wined3d_state *state,
}
/* Context activation is done by the caller. */
static void shader_arb_select(void *shader_priv, const struct wined3d_context *context,
static void shader_arb_select(void *shader_priv, struct wined3d_context *context,
const struct wined3d_state *state)
{
struct shader_arb_priv *priv = shader_priv;
......@@ -4685,6 +4697,9 @@ static void shader_arb_select(void *shader_priv, const struct wined3d_context *c
/* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */
if (compiled->np2fixup_info.super.active)
shader_arb_load_np2fixup_constants(priv, gl_info, state);
if (ps->load_local_constsF)
context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F;
}
else
{
......@@ -4746,6 +4761,9 @@ static void shader_arb_select(void *shader_priv, const struct wined3d_context *c
FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
}
}
if (vs->load_local_constsF)
context->constant_update_mask |= WINED3D_SHADER_CONST_VS_F;
}
else
{
......@@ -5882,21 +5900,16 @@ static void set_bumpmat_arbfp(struct wined3d_context *context, const struct wine
const struct wined3d_gl_info *gl_info = context->gl_info;
float mat[2][2];
if (use_ps(state))
{
/* The pixel shader has to know the bump env matrix. Do a constants
* update. */
if (stage && (state->pixel_shader->reg_maps.bumpmat & (1 << stage)))
context->load_constants = 1;
context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV;
if(device->shader_backend == &arb_program_shader_backend) {
/* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */
return;
}
}
else if (device->shader_backend == &arb_program_shader_backend)
if (device->shader_backend == &arb_program_shader_backend)
{
struct shader_arb_priv *priv = device->shader_priv;
/* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants. */
if (use_ps(state))
return;
priv->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1;
priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1);
}
......@@ -5918,21 +5931,16 @@ static void tex_bumpenvlum_arbfp(struct wined3d_context *context,
const struct wined3d_gl_info *gl_info = context->gl_info;
float param[4];
if (use_ps(state))
{
/* The pixel shader has to know the luminance offset. Do a constants
* update. */
if (stage && (state->pixel_shader->reg_maps.luminanceparams & (1 << stage)))
context->load_constants = 1;
context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV;
if(device->shader_backend == &arb_program_shader_backend) {
/* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */
return;
}
}
else if (device->shader_backend == &arb_program_shader_backend)
if (device->shader_backend == &arb_program_shader_backend)
{
struct shader_arb_priv *priv = device->shader_priv;
/* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants. */
if (use_ps(state))
return;
priv->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1;
priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1);
}
......@@ -6536,7 +6544,6 @@ static void fragment_prog_arbfp(struct wined3d_context *context, const struct wi
}
context->select_shader = 1;
context->load_constants = 1;
}
/* We can't link the fog states to the fragment state directly since the
......
......@@ -943,7 +943,6 @@ static void atifs_apply_pixelshader(struct wined3d_context *context, const struc
* states. If atifs can deal with this it keeps the rest of the code
* simpler. */
context->select_shader = 1;
context->load_constants = 1;
}
static void atifs_srgbwriteenable(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
......
......@@ -1909,7 +1909,6 @@ static void SetupForBlit(const struct wined3d_device *device, struct wined3d_con
/* Disable shaders */
device->shader_backend->shader_disable(device->shader_priv, context);
context->select_shader = 1;
context->load_constants = 1;
context->blit_w = rt_size.cx;
context->blit_h = rt_size.cy;
......@@ -2393,11 +2392,10 @@ BOOL context_apply_draw_state(struct wined3d_context *context, struct wined3d_de
context->select_shader = 0;
}
if (context->load_constants)
if (context->constant_update_mask)
{
device->shader_backend->shader_load_constants(device->shader_priv,
context, state);
context->load_constants = 0;
device->shader_backend->shader_load_constants(device->shader_priv, context, state);
context->constant_update_mask = 0;
}
if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
......
......@@ -2518,6 +2518,16 @@ struct wined3d_sampler * CDECL wined3d_device_get_vs_sampler(const struct wined3
return device->stateBlock->state.vs_sampler[idx];
}
void device_invalidate_shader_constants(const struct wined3d_device *device, DWORD mask)
{
UINT i;
for (i = 0; i < device->context_count; ++i)
{
device->contexts[i]->constant_update_mask |= mask;
}
}
HRESULT CDECL wined3d_device_set_vs_consts_b(struct wined3d_device *device,
UINT start_register, const BOOL *constants, UINT bool_count)
{
......@@ -2538,7 +2548,7 @@ HRESULT CDECL wined3d_device_set_vs_consts_b(struct wined3d_device *device,
device->updateStateBlock->changed.vertexShaderConstantsB |= (1 << i);
if (!device->isRecordingState)
device_invalidate_state(device, STATE_VERTEXSHADERCONSTANT);
device_invalidate_shader_constants(device, WINED3D_SHADER_CONST_VS_B);
return WINED3D_OK;
}
......@@ -2581,7 +2591,7 @@ HRESULT CDECL wined3d_device_set_vs_consts_i(struct wined3d_device *device,
device->updateStateBlock->changed.vertexShaderConstantsI |= (1 << i);
if (!device->isRecordingState)
device_invalidate_state(device, STATE_VERTEXSHADERCONSTANT);
device_invalidate_shader_constants(device, WINED3D_SHADER_CONST_VS_I);
return WINED3D_OK;
}
......@@ -2628,10 +2638,7 @@ HRESULT CDECL wined3d_device_set_vs_consts_f(struct wined3d_device *device,
}
if (!device->isRecordingState)
{
device->shader_backend->shader_update_float_vertex_constants(device, start_register, vector4f_count);
device_invalidate_state(device, STATE_VERTEXSHADERCONSTANT);
}
memset(device->updateStateBlock->changed.vertexShaderConstantsF + start_register, 1,
sizeof(*device->updateStateBlock->changed.vertexShaderConstantsF) * vector4f_count);
......@@ -3009,7 +3016,7 @@ HRESULT CDECL wined3d_device_set_ps_consts_b(struct wined3d_device *device,
device->updateStateBlock->changed.pixelShaderConstantsB |= (1 << i);
if (!device->isRecordingState)
device_invalidate_state(device, STATE_PIXELSHADERCONSTANT);
device_invalidate_shader_constants(device, WINED3D_SHADER_CONST_PS_B);
return WINED3D_OK;
}
......@@ -3052,7 +3059,7 @@ HRESULT CDECL wined3d_device_set_ps_consts_i(struct wined3d_device *device,
device->updateStateBlock->changed.pixelShaderConstantsI |= (1 << i);
if (!device->isRecordingState)
device_invalidate_state(device, STATE_PIXELSHADERCONSTANT);
device_invalidate_shader_constants(device, WINED3D_SHADER_CONST_PS_I);
return WINED3D_OK;
}
......@@ -3100,10 +3107,7 @@ HRESULT CDECL wined3d_device_set_ps_consts_f(struct wined3d_device *device,
}
if (!device->isRecordingState)
{
device->shader_backend->shader_update_float_pixel_constants(device, start_register, vector4f_count);
device_invalidate_state(device, STATE_PIXELSHADERCONSTANT);
}
memset(device->updateStateBlock->changed.pixelShaderConstantsF + start_register, 1,
sizeof(*device->updateStateBlock->changed.pixelShaderConstantsF) * vector4f_count);
......
......@@ -1509,7 +1509,7 @@ static void shader_none_select_depth_blt(void *shader_priv, const struct wined3d
static void shader_none_deselect_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info) {}
static void shader_none_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) {}
static void shader_none_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) {}
static void shader_none_load_constants(void *shader_priv, const struct wined3d_context *context,
static void shader_none_load_constants(void *shader_priv, struct wined3d_context *context,
const struct wined3d_state *state) {}
static void shader_none_load_np2fixup_constants(void *shader_priv,
const struct wined3d_gl_info *gl_info, const struct wined3d_state *state) {}
......@@ -1517,7 +1517,7 @@ static void shader_none_destroy(struct wined3d_shader *shader) {}
static void shader_none_context_destroyed(void *shader_priv, const struct wined3d_context *context) {}
/* Context activation is done by the caller. */
static void shader_none_select(void *shader_priv, const struct wined3d_context *context,
static void shader_none_select(void *shader_priv, struct wined3d_context *context,
const struct wined3d_state *state)
{
const struct wined3d_gl_info *gl_info = context->gl_info;
......
......@@ -2586,10 +2586,6 @@ const char *debug_d3dstate(DWORD state)
return "STATE_GEOMETRY_SHADER";
if (STATE_IS_VIEWPORT(state))
return "STATE_VIEWPORT";
if (STATE_IS_VERTEXSHADERCONSTANT(state))
return "STATE_VERTEXSHADERCONSTANT";
if (STATE_IS_PIXELSHADERCONSTANT(state))
return "STATE_PIXELSHADERCONSTANT";
if (STATE_IS_LIGHT_TYPE(state))
return "STATE_LIGHT_TYPE";
if (STATE_IS_ACTIVELIGHT(state))
......
......@@ -283,6 +283,17 @@ enum wined3d_sampler_texture_type
WINED3DSTT_VOLUME = 4,
};
#define WINED3D_SHADER_CONST_VS_F 0x00000001
#define WINED3D_SHADER_CONST_VS_I 0x00000002
#define WINED3D_SHADER_CONST_VS_B 0x00000004
#define WINED3D_SHADER_CONST_VS_POS_FIXUP 0x00000008
#define WINED3D_SHADER_CONST_PS_F 0x00000010
#define WINED3D_SHADER_CONST_PS_I 0x00000020
#define WINED3D_SHADER_CONST_PS_B 0x00000040
#define WINED3D_SHADER_CONST_PS_BUMP_ENV 0x00000080
#define WINED3D_SHADER_CONST_PS_Y_CORR 0x00000100
#define WINED3D_SHADER_CONST_FFP_PS 0x00000200
enum wined3d_shader_register_type
{
WINED3DSPR_TEMP = 0,
......@@ -796,7 +807,7 @@ struct wined3d_vertex_pipe_ops;
struct wined3d_shader_backend_ops
{
void (*shader_handle_instruction)(const struct wined3d_shader_instruction *);
void (*shader_select)(void *shader_priv, const struct wined3d_context *context,
void (*shader_select)(void *shader_priv, struct wined3d_context *context,
const struct wined3d_state *state);
void (*shader_disable)(void *shader_priv, const struct wined3d_context *context);
void (*shader_select_depth_blt)(void *shader_priv, const struct wined3d_gl_info *gl_info,
......@@ -804,7 +815,7 @@ struct wined3d_shader_backend_ops
void (*shader_deselect_depth_blt)(void *shader_priv, const struct wined3d_gl_info *gl_info);
void (*shader_update_float_vertex_constants)(struct wined3d_device *device, UINT start, UINT count);
void (*shader_update_float_pixel_constants)(struct wined3d_device *device, UINT start, UINT count);
void (*shader_load_constants)(void *shader_priv, const struct wined3d_context *context,
void (*shader_load_constants)(void *shader_priv, struct wined3d_context *context,
const struct wined3d_state *state);
void (*shader_load_np2fixup_constants)(void *shader_priv, const struct wined3d_gl_info *gl_info,
const struct wined3d_state *state);
......@@ -982,12 +993,7 @@ extern glMultiTexCoordFunc multi_texcoord_funcs[WINED3D_FFP_EMIT_COUNT] DECLSPEC
#define STATE_VIEWPORT (STATE_GEOMETRY_SHADER + 1)
#define STATE_IS_VIEWPORT(a) ((a) == STATE_VIEWPORT)
#define STATE_VERTEXSHADERCONSTANT (STATE_VIEWPORT + 1)
#define STATE_PIXELSHADERCONSTANT (STATE_VERTEXSHADERCONSTANT + 1)
#define STATE_IS_VERTEXSHADERCONSTANT(a) ((a) == STATE_VERTEXSHADERCONSTANT)
#define STATE_IS_PIXELSHADERCONSTANT(a) ((a) == STATE_PIXELSHADERCONSTANT)
#define STATE_LIGHT_TYPE (STATE_PIXELSHADERCONSTANT + 1)
#define STATE_LIGHT_TYPE (STATE_VIEWPORT + 1)
#define STATE_IS_LIGHT_TYPE(a) ((a) == STATE_LIGHT_TYPE)
#define STATE_ACTIVELIGHT(a) (STATE_LIGHT_TYPE + 1 + (a))
#define STATE_IS_ACTIVELIGHT(a) ((a) >= STATE_ACTIVELIGHT(0) && (a) < STATE_ACTIVELIGHT(MAX_ACTIVE_LIGHTS))
......@@ -1096,10 +1102,9 @@ struct wined3d_context
DWORD destroyed : 1;
DWORD valid : 1;
DWORD select_shader : 1;
DWORD load_constants : 1;
DWORD padding : 15;
BYTE texShaderBumpMap; /* MAX_TEXTURES, 8 */
BYTE lastWasPow2Texture; /* MAX_TEXTURES, 8 */
DWORD texShaderBumpMap : 8; /* MAX_TEXTURES, 8 */
DWORD lastWasPow2Texture : 8; /* MAX_TEXTURES, 8 */
DWORD constant_update_mask;
DWORD numbered_array_mask;
GLenum tracking_parm; /* Which source is tracking current colour */
GLenum untracked_materials[2];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment