Commit e32b8e81 authored by Matteo Bruni's avatar Matteo Bruni Committed by Alexandre Julliard

wined3d: Optimize scanning changed shader constants in wined3d_device_apply_stateblock().

parent 8d87bce0
...@@ -3835,13 +3835,14 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d ...@@ -3835,13 +3835,14 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d
void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
struct wined3d_stateblock *stateblock) struct wined3d_stateblock *stateblock)
{ {
const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
const struct wined3d_stateblock_state *state = &stateblock->stateblock_state; const struct wined3d_stateblock_state *state = &stateblock->stateblock_state;
const struct wined3d_saved_states *changed = &stateblock->changed; const struct wined3d_saved_states *changed = &stateblock->changed;
struct wined3d_blend_state *blend_state; struct wined3d_blend_state *blend_state;
struct wined3d_color colour; struct wined3d_color colour;
unsigned int i, j, count; struct wined3d_range range;
unsigned int i, j, start;
BOOL set_blend_state; BOOL set_blend_state;
DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock); TRACE("device %p, stateblock %p.\n", device, stateblock);
...@@ -3850,89 +3851,57 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, ...@@ -3850,89 +3851,57 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device,
if (changed->pixelShader) if (changed->pixelShader)
wined3d_device_set_pixel_shader(device, state->ps); wined3d_device_set_pixel_shader(device, state->ps);
count = 0; for (start = 0; ; start = range.offset + range.size)
for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i)
{ {
if (wined3d_bitmap_is_set(changed->vs_consts_f, i)) if (!wined3d_bitmap_get_range(changed->vs_consts_f, WINED3D_MAX_VS_CONSTS_F, start, &range))
++count; break;
else if (count)
{ wined3d_device_set_vs_consts_f(device, range.offset, range.size, &state->vs_consts_f[range.offset]);
wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
count = 0;
}
} }
if (count)
wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
count = 0; map = changed->vertexShaderConstantsI;
for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) for (start = 0; ; start = range.offset + range.size)
{ {
if (changed->vertexShaderConstantsB & (1u << i)) if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range))
++count; break;
else if (count)
{ wined3d_device_set_vs_consts_i(device, range.offset, range.size, &state->vs_consts_i[range.offset]);
wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
count = 0;
}
} }
if (count)
wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
count = 0; map = changed->vertexShaderConstantsB;
for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) for (start = 0; ; start = range.offset + range.size)
{ {
if (changed->vertexShaderConstantsI & (1u << i)) if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range))
++count; break;
else if (count)
{ wined3d_device_set_vs_consts_b(device, range.offset, range.size, &state->vs_consts_b[range.offset]);
wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
count = 0;
}
} }
if (count)
wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
count = 0; for (start = 0; ; start = range.offset + range.size)
for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i)
{ {
if (wined3d_bitmap_is_set(changed->ps_consts_f, i)) if (!wined3d_bitmap_get_range(changed->ps_consts_f, WINED3D_MAX_PS_CONSTS_F, start, &range))
++count; break;
else if (count)
{ wined3d_device_set_ps_consts_f(device, range.offset, range.size, &state->ps_consts_f[range.offset]);
wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
count = 0;
}
} }
if (count)
wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
count = 0; map = changed->pixelShaderConstantsI;
for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) for (start = 0; ; start = range.offset + range.size)
{ {
if (changed->pixelShaderConstantsB & (1u << i)) if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range))
++count; break;
else if (count)
{ wined3d_device_set_ps_consts_i(device, range.offset, range.size, &state->ps_consts_i[range.offset]);
wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
count = 0;
}
} }
if (count)
wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
count = 0; map = changed->pixelShaderConstantsB;
for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) for (start = 0; ; start = range.offset + range.size)
{ {
if (changed->pixelShaderConstantsI & (1u << i)) if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range))
++count; break;
else if (count)
{ wined3d_device_set_ps_consts_b(device, range.offset, range.size, &state->ps_consts_b[range.offset]);
wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
count = 0;
}
} }
if (count)
wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i) for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i)
{ {
......
...@@ -3196,13 +3196,13 @@ struct wined3d_state ...@@ -3196,13 +3196,13 @@ struct wined3d_state
struct wined3d_shader_resource_view *shader_resource_view[WINED3D_SHADER_TYPE_COUNT][MAX_SHADER_RESOURCE_VIEWS]; struct wined3d_shader_resource_view *shader_resource_view[WINED3D_SHADER_TYPE_COUNT][MAX_SHADER_RESOURCE_VIEWS];
struct wined3d_unordered_access_view *unordered_access_view[WINED3D_PIPELINE_COUNT][MAX_UNORDERED_ACCESS_VIEWS]; struct wined3d_unordered_access_view *unordered_access_view[WINED3D_PIPELINE_COUNT][MAX_UNORDERED_ACCESS_VIEWS];
BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I];
struct wined3d_vec4 vs_consts_f[WINED3D_MAX_VS_CONSTS_F]; struct wined3d_vec4 vs_consts_f[WINED3D_MAX_VS_CONSTS_F];
struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I];
BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I];
struct wined3d_vec4 ps_consts_f[WINED3D_MAX_PS_CONSTS_F]; struct wined3d_vec4 ps_consts_f[WINED3D_MAX_PS_CONSTS_F];
struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I];
BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
struct wined3d_texture *textures[WINED3D_MAX_COMBINED_SAMPLERS]; struct wined3d_texture *textures[WINED3D_MAX_COMBINED_SAMPLERS];
DWORD sampler_states[WINED3D_MAX_COMBINED_SAMPLERS][WINED3D_HIGHEST_SAMPLER_STATE + 1]; DWORD sampler_states[WINED3D_MAX_COMBINED_SAMPLERS][WINED3D_HIGHEST_SAMPLER_STATE + 1];
...@@ -3914,6 +3914,12 @@ struct wined3d_vertex_declaration ...@@ -3914,6 +3914,12 @@ struct wined3d_vertex_declaration
struct wined3d_saved_states struct wined3d_saved_states
{ {
DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5];
WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5];
WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
DWORD transform[(WINED3D_HIGHEST_TRANSFORM_STATE >> 5) + 1]; DWORD transform[(WINED3D_HIGHEST_TRANSFORM_STATE >> 5) + 1];
WORD streamSource; /* WINED3D_MAX_STREAMS, 16 */ WORD streamSource; /* WINED3D_MAX_STREAMS, 16 */
WORD streamFreq; /* WINED3D_MAX_STREAMS, 16 */ WORD streamFreq; /* WINED3D_MAX_STREAMS, 16 */
...@@ -3921,12 +3927,6 @@ struct wined3d_saved_states ...@@ -3921,12 +3927,6 @@ struct wined3d_saved_states
DWORD textureState[WINED3D_MAX_TEXTURES]; /* WINED3D_HIGHEST_TEXTURE_STATE + 1, 18 */ DWORD textureState[WINED3D_MAX_TEXTURES]; /* WINED3D_HIGHEST_TEXTURE_STATE + 1, 18 */
WORD samplerState[WINED3D_MAX_COMBINED_SAMPLERS]; /* WINED3D_HIGHEST_SAMPLER_STATE + 1, 14 */ WORD samplerState[WINED3D_MAX_COMBINED_SAMPLERS]; /* WINED3D_HIGHEST_SAMPLER_STATE + 1, 14 */
DWORD clipplane; /* WINED3D_MAX_USER_CLIP_PLANES, 32 */ DWORD clipplane; /* WINED3D_MAX_USER_CLIP_PLANES, 32 */
WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5];
WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */
WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */
DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5];
DWORD textures : 20; /* WINED3D_MAX_COMBINED_SAMPLERS, 20 */ DWORD textures : 20; /* WINED3D_MAX_COMBINED_SAMPLERS, 20 */
DWORD indices : 1; DWORD indices : 1;
DWORD material : 1; DWORD material : 1;
...@@ -5325,6 +5325,60 @@ static inline BOOL wined3d_bitmap_is_set(const uint32_t *map, unsigned int idx) ...@@ -5325,6 +5325,60 @@ static inline BOOL wined3d_bitmap_is_set(const uint32_t *map, unsigned int idx)
return map[idx >> 5] & (1u << (idx & 0x1f)); return map[idx >> 5] & (1u << (idx & 0x1f));
} }
static inline unsigned int wined3d_bitmap_ffs_xor(const uint32_t *bitmap, unsigned int bit_count,
unsigned int start, uint32_t xor_mask)
{
const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT;
const uint32_t *ptr, *end_ptr;
uint32_t map, mask;
assert(bit_count < word_bit_count || !(bit_count % word_bit_count));
ptr = bitmap + start / word_bit_count;
end_ptr = bitmap + (bit_count + word_bit_count - 1) / word_bit_count;
if (ptr >= end_ptr)
return ~0u;
mask = ~0u << start % word_bit_count;
map = (*ptr ^ xor_mask) & mask;
while (!map)
{
if (++ptr == end_ptr)
return ~0u;
map = *ptr ^ xor_mask;
}
return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map);
}
static inline unsigned int wined3d_bitmap_ffs(const uint32_t *bitmap, unsigned int bit_count, unsigned int start)
{
return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, 0);
}
static inline unsigned int wined3d_bitmap_ffz(const uint32_t *bitmap, unsigned int bit_count, unsigned int start)
{
return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, ~0u);
}
static inline BOOL wined3d_bitmap_get_range(const DWORD *bitmap, unsigned int bit_count,
unsigned int start, struct wined3d_range *range)
{
unsigned int range_start, range_end;
range_start = wined3d_bitmap_ffs(bitmap, bit_count, start);
if (range_start == ~0u)
return FALSE;
range_end = wined3d_bitmap_ffz(bitmap, bit_count, range_start + 1);
if (range_end == ~0u)
range_end = bit_count;
range->offset = range_start;
range->size = range_end - range_start;
return TRUE;
}
/* The WNDCLASS-Name for the fake window which we use to retrieve the GL capabilities */ /* The WNDCLASS-Name for the fake window which we use to retrieve the GL capabilities */
#define WINED3D_OPENGL_WINDOW_CLASS_NAME "WineD3D_OpenGL" #define WINED3D_OPENGL_WINDOW_CLASS_NAME "WineD3D_OpenGL"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment