Commit 0bf32b12 authored by Stefan Dösinger's avatar Stefan Dösinger Committed by Alexandre Julliard

wined3d: Add the ability to duplicate GL pixel shaders.

Some stateblock parameters have to be compiled into the GL pixel shader code, like lines for pixelformat fixups. This leads to problems when applications switch those settings, requiring a recompilation of the shader. This patch enables wined3d to have multiple GL shaders for a D3D shader(pixel shaders only so far) to handle this more efficiently.
parent aed9305c
......@@ -1101,8 +1101,9 @@ static void shader_none_destroy(IWineD3DBaseShader *iface) {}
static HRESULT shader_none_alloc(IWineD3DDevice *iface) {return WINED3D_OK;}
static void shader_none_free(IWineD3DDevice *iface) {}
static BOOL shader_none_dirty_const(IWineD3DDevice *iface) {return FALSE;}
static void shader_none_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) {
static GLuint shader_none_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) {
FIXME("NONE shader backend asked to generate a pixel shader\n");
return 0;
}
static void shader_none_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) {
FIXME("NONE shader backend asked to generate a vertex shader\n");
......
......@@ -73,8 +73,6 @@ struct glsl_shader_prog_link {
GLhandleARB bumpenvmat_location[MAX_TEXTURES];
GLhandleARB luminancescale_location[MAX_TEXTURES];
GLhandleARB luminanceoffset_location[MAX_TEXTURES];
GLhandleARB srgb_comparison_location;
GLhandleARB srgb_mul_low_location;
GLhandleARB ycorrection_location;
GLenum vertex_color_clamp;
GLhandleARB vshader;
......@@ -492,28 +490,6 @@ static void shader_glsl_load_constants(
}
}
if(((IWineD3DPixelShaderImpl *) pshader)->srgb_enabled &&
!((IWineD3DPixelShaderImpl *) pshader)->srgb_mode_hardcoded) {
float comparison[4];
float mul_low[4];
if(stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {
comparison[0] = srgb_cmp; comparison[1] = srgb_cmp;
comparison[2] = srgb_cmp; comparison[3] = srgb_cmp;
mul_low[0] = srgb_mul_low; mul_low[1] = srgb_mul_low;
mul_low[2] = srgb_mul_low; mul_low[3] = srgb_mul_low;
} else {
comparison[0] = 1.0 / 0.0; comparison[1] = 1.0 / 0.0;
comparison[2] = 1.0 / 0.0; comparison[3] = 1.0 / 0.0;
mul_low[0] = 1.0; mul_low[1] = 1.0;
mul_low[2] = 1.0; mul_low[3] = 1.0;
}
GL_EXTCALL(glUniform4fvARB(prog->srgb_comparison_location, 1, comparison));
GL_EXTCALL(glUniform4fvARB(prog->srgb_mul_low_location, 1, mul_low));
}
if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {
float correction_params[4];
if(deviceImpl->render_offscreen) {
......@@ -608,28 +584,11 @@ static void shader_generate_glsl_declarations(
}
if(device->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {
ps_impl->srgb_enabled = 1;
if(This->baseShader.limits.constant_float + extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) {
shader_addline(buffer, "uniform vec4 srgb_mul_low;\n");
shader_addline(buffer, "uniform vec4 srgb_comparison;\n");
ps_impl->srgb_mode_hardcoded = 0;
extra_constants_needed++;
} else {
ps_impl->srgb_mode_hardcoded = 1;
shader_addline(buffer, "const vec4 srgb_mul_low = vec4(%f, %f, %f, %f);\n",
srgb_mul_low, srgb_mul_low, srgb_mul_low, srgb_mul_low);
shader_addline(buffer, "const vec4 srgb_comparison = vec4(%f, %f, %f, %f);\n",
srgb_cmp, srgb_cmp, srgb_cmp, srgb_cmp);
}
} else {
IWineD3DPixelShaderImpl *ps_impl = (IWineD3DPixelShaderImpl *) This;
/* Do not write any srgb fixup into the shader to save shader size and processing time.
* As a consequence, we can't toggle srgb write on without recompilation
*/
ps_impl->srgb_enabled = 0;
ps_impl->srgb_mode_hardcoded = 1;
}
if(reg_maps->vpos || reg_maps->usesdsy) {
if(This->baseShader.limits.constant_float + extra_constants_needed + 1 < GL_LIMITS(pshader_constantsF)) {
shader_addline(buffer, "uniform vec4 ycorrection;\n");
......@@ -2798,7 +2757,8 @@ static void pshader_glsl_dp2add(SHADER_OPCODE_ARG* arg) {
static void pshader_glsl_input_pack(
SHADER_BUFFER* buffer,
semantic* semantics_in,
IWineD3DPixelShader *iface) {
IWineD3DPixelShader *iface,
enum vertexprocessing_mode vertexprocessing) {
unsigned int i;
IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *) iface;
......@@ -2819,7 +2779,7 @@ static void pshader_glsl_input_pack(
switch(usage) {
case WINED3DDECLUSAGE_TEXCOORD:
if(usage_idx < 8 && This->vertexprocessing == pretransformed) {
if(usage_idx < 8 && vertexprocessing == pretransformed) {
shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n",
This->input_reg_map[i], reg_mask, usage_idx, reg_mask);
} else {
......@@ -3236,8 +3196,7 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use
if(use_ps) {
struct ps_compile_args compile_args;
find_ps_compile_args((IWineD3DPixelShaderImpl*)This->stateBlock->pixelShader, This->stateBlock, &compile_args);
pixelshader_compile(pshader, &compile_args);
pshader_id = ((IWineD3DPixelShaderImpl*)pshader)->prgId;
pshader_id = find_gl_pshader((IWineD3DPixelShaderImpl *) pshader, &compile_args);
} else {
pshader_id = 0;
}
......@@ -3347,8 +3306,6 @@ static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use
entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
entry->srgb_comparison_location = GL_EXTCALL(glGetUniformLocationARB(programId, "srgb_comparison"));
entry->srgb_mul_low_location = GL_EXTCALL(glGetUniformLocationARB(programId, "srgb_mul_low"));
entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));
checkGLcall("Find glsl program uniform locations");
......@@ -3535,7 +3492,8 @@ static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device;
struct shader_glsl_priv *priv = (struct shader_glsl_priv *)device->shader_priv;
WineD3D_GL_Info *gl_info = &device->adapter->gl_info;
GLuint *prog;
IWineD3DPixelShaderImpl *ps = NULL;
IWineD3DVertexShaderImpl *vs = NULL;
/* Note: Do not use QueryInterface here to find out which shader type this is because this code
* can be called from IWineD3DBaseShader::Release
......@@ -3543,11 +3501,13 @@ static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
char pshader = shader_is_pshader_version(This->baseShader.hex_version);
if(pshader) {
prog = &((IWineD3DPixelShaderImpl *) This)->prgId;
ps = (IWineD3DPixelShaderImpl *) This;
if(ps->num_gl_shaders == 0) return;
} else {
prog = &((IWineD3DVertexShaderImpl *) This)->prgId;
vs = (IWineD3DVertexShaderImpl *) This;
if(vs->prgId == 0) return;
}
if(*prog == 0) return;
linked_programs = &This->baseShader.linked_programs;
TRACE("Deleting linked programs\n");
......@@ -3565,11 +3525,28 @@ static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
}
}
TRACE("Deleting shader object %u\n", *prog);
GL_EXTCALL(glDeleteObjectARB(*prog));
if(pshader) {
UINT i;
ENTER_GL();
for(i = 0; i < ps->num_gl_shaders; i++) {
TRACE("deleting pshader %u\n", ps->gl_shaders[i].prgId);
GL_EXTCALL(glDeleteObjectARB(ps->gl_shaders[i].prgId));
checkGLcall("glDeleteObjectARB");
*prog = 0;
This->baseShader.is_compiled = FALSE;
}
LEAVE_GL();
HeapFree(GetProcessHeap(), 0, ps->gl_shaders);
ps->gl_shaders = NULL;
ps->num_gl_shaders = 0;
} else {
TRACE("Deleting shader object %u\n", vs->prgId);
ENTER_GL();
GL_EXTCALL(glDeleteObjectARB(vs->prgId));
checkGLcall("glDeleteObjectARB");
LEAVE_GL();
vs->prgId = 0;
vs->baseShader.is_compiled = FALSE;
}
}
static unsigned int glsl_program_key_hash(void *key) {
......@@ -3626,7 +3603,7 @@ static BOOL shader_glsl_dirty_const(IWineD3DDevice *iface) {
return FALSE;
}
static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) {
static GLuint shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) {
IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
shader_reg_maps* reg_maps = &This->baseShader.reg_maps;
CONST DWORD *function = This->baseShader.function;
......@@ -3655,13 +3632,9 @@ static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF
if (This->baseShader.hex_version >= WINED3DPS_VERSION(3,0)) {
if(((IWineD3DDeviceImpl *) This->baseShader.device)->strided_streams.u.s.position_transformed) {
This->vertexprocessing = pretransformed;
pshader_glsl_input_pack(buffer, This->semantics_in, iface);
pshader_glsl_input_pack(buffer, This->semantics_in, iface, pretransformed);
} else if(!use_vs((IWineD3DDeviceImpl *) This->baseShader.device)) {
This->vertexprocessing = fixedfunction;
pshader_glsl_input_pack(buffer, This->semantics_in, iface);
} else {
This->vertexprocessing = vertexshader;
pshader_glsl_input_pack(buffer, This->semantics_in, iface, fixedfunction);
}
}
......@@ -3682,7 +3655,7 @@ static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF
} else {
fragcolor = "gl_FragColor";
}
if(This->srgb_enabled) {
if(((IWineD3DDeviceImpl *)This->baseShader.device)->stateBlock->renderState[WINED3DRS_SRGBWRITEENABLE]) {
shader_addline(buffer, "tmp0.xyz = pow(%s.xyz, vec3(%f, %f, %f)) * vec3(%f, %f, %f) - vec3(%f, %f, %f);\n",
fragcolor, srgb_pow, srgb_pow, srgb_pow, srgb_mul_high, srgb_mul_high, srgb_mul_high,
srgb_sub_high, srgb_sub_high, srgb_sub_high);
......@@ -3711,7 +3684,7 @@ static void shader_glsl_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFF
print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
/* Store the shader object */
This->prgId = shader_obj;
return shader_obj;
}
static void shader_glsl_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) {
......
......@@ -272,32 +272,16 @@ static void pshader_set_limits(
/** Generate a pixel shader string using either GL_FRAGMENT_PROGRAM_ARB
or GLSL and send it to the card */
static inline VOID IWineD3DPixelShaderImpl_GenerateShader(
IWineD3DPixelShader *iface) {
IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
static inline GLuint IWineD3DPixelShaderImpl_GenerateShader(
IWineD3DPixelShaderImpl *This) {
SHADER_BUFFER buffer;
#if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
This->fixupVertexBufferSize = PGMSIZE;
This->fixupVertexBuffer[0] = 0;
}
buffer.buffer = This->device->fixupVertexBuffer;
#else
buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
#endif
buffer.bsize = 0;
buffer.lineNo = 0;
buffer.newline = TRUE;
((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_pshader(iface, &buffer);
#if 1 /* if were using the data buffer of device then we don't need to free it */
HeapFree(GetProcessHeap(), 0, buffer.buffer);
#endif
return ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_pshader((IWineD3DPixelShader *) This, &buffer);
}
static HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
......@@ -384,89 +368,29 @@ static HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *i
return WINED3D_OK;
}
HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct ps_compile_args *args) {
IWineD3DPixelShaderImpl *This =(IWineD3DPixelShaderImpl *)iface;
IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
GLuint pixelshader_compile(IWineD3DPixelShaderImpl *This, struct ps_compile_args *args) {
CONST DWORD *function = This->baseShader.function;
UINT i, sampler;
HRESULT hr;
GLuint retval;
TRACE("(%p) : function %p\n", iface, function);
/* We're already compiled, but check if any of the hardcoded stateblock assumptions
* changed.
*/
if (This->baseShader.is_compiled) {
for(i = 0; i < This->baseShader.num_sampled_samplers; i++) {
sampler = This->baseShader.sampled_samplers[i];
if(args->format_conversion[sampler] != This->baseShader.sampled_format[sampler]) {
WARN("Recompiling shader %p due to format change on sampler %d\n", This, sampler);
WARN("Old format group %s, new is %s\n",
debug_d3dformat(This->baseShader.sampled_format[sampler]),
debug_d3dformat(args->format_conversion[sampler]));
goto recompile;
}
}
/* TODO: Check projected textures */
/* TODO: Check texture types(2D, Cube, 3D) */
if(args->srgb_correction != This->srgb_enabled && This->srgb_mode_hardcoded) {
WARN("Recompiling shader because srgb correction is different and hardcoded\n");
goto recompile;
}
if(This->baseShader.reg_maps.vpos && !This->vpos_uniform) {
if(This->render_offscreen != deviceImpl->render_offscreen ||
This->height != ((IWineD3DSurfaceImpl *) deviceImpl->render_targets[0])->currentDesc.Height) {
WARN("Recompiling shader because vpos is used, hard compiled and changed\n");
goto recompile;
}
}
if(This->baseShader.reg_maps.usesdsy && !This->vpos_uniform) {
if(This->render_offscreen ? 0 : 1 != deviceImpl->render_offscreen ? 0 : 1) {
WARN("Recompiling shader because dsy is used, hard compiled and render_offscreen changed\n");
goto recompile;
}
}
if(This->baseShader.hex_version >= WINED3DPS_VERSION(3,0)) {
if(args->vp_mode != This->vertexprocessing) {
WARN("Recompiling shader because the vertex processing mode changed\n");
goto recompile;
}
}
return WINED3D_OK;
recompile:
if(This->baseShader.recompile_count > 50) {
FIXME("Shader %p recompiled more than 50 times\n", This);
} else {
This->baseShader.recompile_count++;
}
TRACE("(%p) : function %p\n", This, function);
deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
hr = IWineD3DPixelShader_UpdateSamplers((IWineD3DPixelShader *) This);
if(FAILED(hr)) {
ERR("Failed to update sampler information\n");
return 0;
}
/* We don't need to compile */
if (!function) {
This->baseShader.is_compiled = TRUE;
return WINED3D_OK;
}
hr = IWineD3DPixelShader_UpdateSamplers(iface);
if(FAILED(hr)) return hr;
/* Reset fields tracking stateblock values being hardcoded in the shader */
This->baseShader.num_sampled_samplers = 0;
/* Generate the HW shader */
TRACE("(%p) : Generating hardware program\n", This);
IWineD3DPixelShaderImpl_GenerateShader(iface);
retval = IWineD3DPixelShaderImpl_GenerateShader(This);
This->baseShader.is_compiled = TRUE;
return WINED3D_OK;
return retval;
}
static HRESULT WINAPI IWineD3DPixelShaderImpl_UpdateSamplers(IWineD3DPixelShader *iface) {
......@@ -532,3 +456,35 @@ void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImp
args->vp_mode = vertexshader;
}
}
GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct ps_compile_args *args) {
UINT i;
struct ps_compiled_shader *old_array;
/* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
* so a linear search is more performant than a hashmap
*/
for(i = 0; i < shader->num_gl_shaders; i++) {
if(memcmp(&shader->gl_shaders[i].args, args, sizeof(*args)) == 0) {
return shader->gl_shaders[i].prgId;
}
}
TRACE("No matching GL shader found, compiling a new shader\n");
old_array = shader->gl_shaders;
if(old_array) {
shader->gl_shaders = HeapReAlloc(GetProcessHeap(), 0, old_array,
(shader->num_gl_shaders + 1) * sizeof(*shader->gl_shaders));
} else {
shader->gl_shaders = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader->gl_shaders));
}
if(!shader->gl_shaders) {
ERR("Out of memory\n");
return 0;
}
shader->gl_shaders[shader->num_gl_shaders].args = *args;
shader->gl_shaders[shader->num_gl_shaders].prgId = pixelshader_compile(shader, args);
return shader->gl_shaders[shader->num_gl_shaders++].prgId;
}
......@@ -350,7 +350,7 @@ typedef struct {
HRESULT (*shader_alloc_private)(IWineD3DDevice *iface);
void (*shader_free_private)(IWineD3DDevice *iface);
BOOL (*shader_dirtifyable_constants)(IWineD3DDevice *iface);
void (*shader_generate_pshader)(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer);
GLuint (*shader_generate_pshader)(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer);
void (*shader_generate_vshader)(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer);
void (*shader_get_caps)(WINED3DDEVTYPE devtype, WineD3D_GL_Info *gl_info, struct shader_caps *caps);
BOOL (*shader_conv_supported)(WINED3DFORMAT conv);
......@@ -2346,6 +2346,13 @@ struct ps_compile_args {
BOOL srgb_correction;
WINED3DFORMAT format_conversion[MAX_FRAGMENT_SAMPLERS];
enum vertexprocessing_mode vp_mode;
/* Projected textures(ps 1.0-1.3) */
/* Texture types(2D, Cube, 3D) in ps 1.x */
};
struct ps_compiled_shader {
struct ps_compile_args args;
GLuint prgId;
};
typedef struct IWineD3DPixelShaderImpl {
......@@ -2365,25 +2372,19 @@ typedef struct IWineD3DPixelShaderImpl {
int declared_in_count;
/* The GL shader */
GLuint prgId;
struct ps_compiled_shader *gl_shaders;
UINT num_gl_shaders;
/* Some information about the shader behavior */
struct stb_const_desc bumpenvmatconst[MAX_TEXTURES];
char numbumpenvmatconsts;
struct stb_const_desc luminanceconst[MAX_TEXTURES];
char srgb_enabled;
char srgb_mode_hardcoded;
UINT srgb_low_const;
UINT srgb_cmp_const;
char vpos_uniform;
BOOL render_offscreen;
UINT height;
enum vertexprocessing_mode vertexprocessing;
} IWineD3DPixelShaderImpl;
extern const SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[];
extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl;
HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct ps_compile_args *args);
GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct ps_compile_args *args);
void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct ps_compile_args *args);
/* sRGB correction constants */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment