Commit 0bf32b12 authored by Stefan Dösinger's avatar Stefan Dösinger Committed by Alexandre Julliard

wined3d: Add the ability to duplicate GL pixel shaders.

Some stateblock parameters have to be compiled into the GL pixel shader code, like lines for pixelformat fixups. This leads to problems when applications switch those settings, requiring a recompilation of the shader. This patch enables wined3d to have multiple GL shaders for a D3D shader(pixel shaders only so far) to handle this more efficiently.
parent aed9305c
...@@ -1101,8 +1101,9 @@ static void shader_none_destroy(IWineD3DBaseShader *iface) {} ...@@ -1101,8 +1101,9 @@ static void shader_none_destroy(IWineD3DBaseShader *iface) {}
static HRESULT shader_none_alloc(IWineD3DDevice *iface) {return WINED3D_OK;} static HRESULT shader_none_alloc(IWineD3DDevice *iface) {return WINED3D_OK;}
static void shader_none_free(IWineD3DDevice *iface) {} static void shader_none_free(IWineD3DDevice *iface) {}
static BOOL shader_none_dirty_const(IWineD3DDevice *iface) {return FALSE;} static BOOL shader_none_dirty_const(IWineD3DDevice *iface) {return FALSE;}
static void shader_none_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) { static GLuint shader_none_generate_pshader(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer) {
FIXME("NONE shader backend asked to generate a pixel shader\n"); FIXME("NONE shader backend asked to generate a pixel shader\n");
return 0;
} }
static void shader_none_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) { static void shader_none_generate_vshader(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer) {
FIXME("NONE shader backend asked to generate a vertex shader\n"); FIXME("NONE shader backend asked to generate a vertex shader\n");
......
...@@ -272,32 +272,16 @@ static void pshader_set_limits( ...@@ -272,32 +272,16 @@ static void pshader_set_limits(
/** Generate a pixel shader string using either GL_FRAGMENT_PROGRAM_ARB /** Generate a pixel shader string using either GL_FRAGMENT_PROGRAM_ARB
or GLSL and send it to the card */ or GLSL and send it to the card */
static inline VOID IWineD3DPixelShaderImpl_GenerateShader( static inline GLuint IWineD3DPixelShaderImpl_GenerateShader(
IWineD3DPixelShader *iface) { IWineD3DPixelShaderImpl *This) {
IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
SHADER_BUFFER buffer; SHADER_BUFFER buffer;
#if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
This->fixupVertexBufferSize = PGMSIZE;
This->fixupVertexBuffer[0] = 0;
}
buffer.buffer = This->device->fixupVertexBuffer;
#else
buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
#endif
buffer.bsize = 0; buffer.bsize = 0;
buffer.lineNo = 0; buffer.lineNo = 0;
buffer.newline = TRUE; buffer.newline = TRUE;
((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_pshader(iface, &buffer); return ((IWineD3DDeviceImpl *)This->baseShader.device)->shader_backend->shader_generate_pshader((IWineD3DPixelShader *) This, &buffer);
#if 1 /* if were using the data buffer of device then we don't need to free it */
HeapFree(GetProcessHeap(), 0, buffer.buffer);
#endif
} }
static HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) { static HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
...@@ -384,89 +368,29 @@ static HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *i ...@@ -384,89 +368,29 @@ static HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *i
return WINED3D_OK; return WINED3D_OK;
} }
HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct ps_compile_args *args) { GLuint pixelshader_compile(IWineD3DPixelShaderImpl *This, struct ps_compile_args *args) {
IWineD3DPixelShaderImpl *This =(IWineD3DPixelShaderImpl *)iface;
IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
CONST DWORD *function = This->baseShader.function; CONST DWORD *function = This->baseShader.function;
UINT i, sampler;
HRESULT hr; HRESULT hr;
GLuint retval;
TRACE("(%p) : function %p\n", iface, function); TRACE("(%p) : function %p\n", This, function);
/* We're already compiled, but check if any of the hardcoded stateblock assumptions
* changed.
*/
if (This->baseShader.is_compiled) {
for(i = 0; i < This->baseShader.num_sampled_samplers; i++) {
sampler = This->baseShader.sampled_samplers[i];
if(args->format_conversion[sampler] != This->baseShader.sampled_format[sampler]) {
WARN("Recompiling shader %p due to format change on sampler %d\n", This, sampler);
WARN("Old format group %s, new is %s\n",
debug_d3dformat(This->baseShader.sampled_format[sampler]),
debug_d3dformat(args->format_conversion[sampler]));
goto recompile;
}
}
/* TODO: Check projected textures */
/* TODO: Check texture types(2D, Cube, 3D) */
if(args->srgb_correction != This->srgb_enabled && This->srgb_mode_hardcoded) {
WARN("Recompiling shader because srgb correction is different and hardcoded\n");
goto recompile;
}
if(This->baseShader.reg_maps.vpos && !This->vpos_uniform) {
if(This->render_offscreen != deviceImpl->render_offscreen ||
This->height != ((IWineD3DSurfaceImpl *) deviceImpl->render_targets[0])->currentDesc.Height) {
WARN("Recompiling shader because vpos is used, hard compiled and changed\n");
goto recompile;
}
}
if(This->baseShader.reg_maps.usesdsy && !This->vpos_uniform) {
if(This->render_offscreen ? 0 : 1 != deviceImpl->render_offscreen ? 0 : 1) {
WARN("Recompiling shader because dsy is used, hard compiled and render_offscreen changed\n");
goto recompile;
}
}
if(This->baseShader.hex_version >= WINED3DPS_VERSION(3,0)) {
if(args->vp_mode != This->vertexprocessing) {
WARN("Recompiling shader because the vertex processing mode changed\n");
goto recompile;
}
}
return WINED3D_OK;
recompile: hr = IWineD3DPixelShader_UpdateSamplers((IWineD3DPixelShader *) This);
if(This->baseShader.recompile_count > 50) { if(FAILED(hr)) {
FIXME("Shader %p recompiled more than 50 times\n", This); ERR("Failed to update sampler information\n");
} else { return 0;
This->baseShader.recompile_count++;
}
deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
}
/* We don't need to compile */
if (!function) {
This->baseShader.is_compiled = TRUE;
return WINED3D_OK;
} }
hr = IWineD3DPixelShader_UpdateSamplers(iface);
if(FAILED(hr)) return hr;
/* Reset fields tracking stateblock values being hardcoded in the shader */ /* Reset fields tracking stateblock values being hardcoded in the shader */
This->baseShader.num_sampled_samplers = 0; This->baseShader.num_sampled_samplers = 0;
/* Generate the HW shader */ /* Generate the HW shader */
TRACE("(%p) : Generating hardware program\n", This); TRACE("(%p) : Generating hardware program\n", This);
IWineD3DPixelShaderImpl_GenerateShader(iface); retval = IWineD3DPixelShaderImpl_GenerateShader(This);
This->baseShader.is_compiled = TRUE; This->baseShader.is_compiled = TRUE;
return WINED3D_OK; return retval;
} }
static HRESULT WINAPI IWineD3DPixelShaderImpl_UpdateSamplers(IWineD3DPixelShader *iface) { static HRESULT WINAPI IWineD3DPixelShaderImpl_UpdateSamplers(IWineD3DPixelShader *iface) {
...@@ -532,3 +456,35 @@ void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImp ...@@ -532,3 +456,35 @@ void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImp
args->vp_mode = vertexshader; args->vp_mode = vertexshader;
} }
} }
GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct ps_compile_args *args) {
UINT i;
struct ps_compiled_shader *old_array;
/* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
* so a linear search is more performant than a hashmap
*/
for(i = 0; i < shader->num_gl_shaders; i++) {
if(memcmp(&shader->gl_shaders[i].args, args, sizeof(*args)) == 0) {
return shader->gl_shaders[i].prgId;
}
}
TRACE("No matching GL shader found, compiling a new shader\n");
old_array = shader->gl_shaders;
if(old_array) {
shader->gl_shaders = HeapReAlloc(GetProcessHeap(), 0, old_array,
(shader->num_gl_shaders + 1) * sizeof(*shader->gl_shaders));
} else {
shader->gl_shaders = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader->gl_shaders));
}
if(!shader->gl_shaders) {
ERR("Out of memory\n");
return 0;
}
shader->gl_shaders[shader->num_gl_shaders].args = *args;
shader->gl_shaders[shader->num_gl_shaders].prgId = pixelshader_compile(shader, args);
return shader->gl_shaders[shader->num_gl_shaders++].prgId;
}
...@@ -350,7 +350,7 @@ typedef struct { ...@@ -350,7 +350,7 @@ typedef struct {
HRESULT (*shader_alloc_private)(IWineD3DDevice *iface); HRESULT (*shader_alloc_private)(IWineD3DDevice *iface);
void (*shader_free_private)(IWineD3DDevice *iface); void (*shader_free_private)(IWineD3DDevice *iface);
BOOL (*shader_dirtifyable_constants)(IWineD3DDevice *iface); BOOL (*shader_dirtifyable_constants)(IWineD3DDevice *iface);
void (*shader_generate_pshader)(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer); GLuint (*shader_generate_pshader)(IWineD3DPixelShader *iface, SHADER_BUFFER *buffer);
void (*shader_generate_vshader)(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer); void (*shader_generate_vshader)(IWineD3DVertexShader *iface, SHADER_BUFFER *buffer);
void (*shader_get_caps)(WINED3DDEVTYPE devtype, WineD3D_GL_Info *gl_info, struct shader_caps *caps); void (*shader_get_caps)(WINED3DDEVTYPE devtype, WineD3D_GL_Info *gl_info, struct shader_caps *caps);
BOOL (*shader_conv_supported)(WINED3DFORMAT conv); BOOL (*shader_conv_supported)(WINED3DFORMAT conv);
...@@ -2346,6 +2346,13 @@ struct ps_compile_args { ...@@ -2346,6 +2346,13 @@ struct ps_compile_args {
BOOL srgb_correction; BOOL srgb_correction;
WINED3DFORMAT format_conversion[MAX_FRAGMENT_SAMPLERS]; WINED3DFORMAT format_conversion[MAX_FRAGMENT_SAMPLERS];
enum vertexprocessing_mode vp_mode; enum vertexprocessing_mode vp_mode;
/* Projected textures(ps 1.0-1.3) */
/* Texture types(2D, Cube, 3D) in ps 1.x */
};
struct ps_compiled_shader {
struct ps_compile_args args;
GLuint prgId;
}; };
typedef struct IWineD3DPixelShaderImpl { typedef struct IWineD3DPixelShaderImpl {
...@@ -2365,25 +2372,19 @@ typedef struct IWineD3DPixelShaderImpl { ...@@ -2365,25 +2372,19 @@ typedef struct IWineD3DPixelShaderImpl {
int declared_in_count; int declared_in_count;
/* The GL shader */ /* The GL shader */
GLuint prgId; struct ps_compiled_shader *gl_shaders;
UINT num_gl_shaders;
/* Some information about the shader behavior */ /* Some information about the shader behavior */
struct stb_const_desc bumpenvmatconst[MAX_TEXTURES]; struct stb_const_desc bumpenvmatconst[MAX_TEXTURES];
char numbumpenvmatconsts; char numbumpenvmatconsts;
struct stb_const_desc luminanceconst[MAX_TEXTURES]; struct stb_const_desc luminanceconst[MAX_TEXTURES];
char srgb_enabled;
char srgb_mode_hardcoded;
UINT srgb_low_const;
UINT srgb_cmp_const;
char vpos_uniform; char vpos_uniform;
BOOL render_offscreen;
UINT height;
enum vertexprocessing_mode vertexprocessing;
} IWineD3DPixelShaderImpl; } IWineD3DPixelShaderImpl;
extern const SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[]; extern const SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[];
extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl; extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl;
HRESULT pixelshader_compile(IWineD3DPixelShader *iface, struct ps_compile_args *args); GLuint find_gl_pshader(IWineD3DPixelShaderImpl *shader, struct ps_compile_args *args);
void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct ps_compile_args *args); void find_ps_compile_args(IWineD3DPixelShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct ps_compile_args *args);
/* sRGB correction constants */ /* sRGB correction constants */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment