Commit d8e219be authored by Stefan Dösinger's avatar Stefan Dösinger Committed by Alexandre Julliard

wined3d: Avoid the TMP_COLOR mov in some cases.

Many 2.0 and 3.0 shaders end with a "mov oC0, rx". If sRGB writing is enabled, the ARB backend writes to a TMP_COLOR temporary, and at the end of the shader writes the sRGB corrected color to result.color. If oC0 is not partially rewritten after the mov, we can ignore the mov, not declare TMP_COLOR at all, and just use the rx register as input for the sRGB correction code. This saves a temporary and an instruction.
parent da7176be
......@@ -1130,12 +1130,13 @@ static void shader_hw_nop(const struct wined3d_shader_instruction *ins)
static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
{
IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
BOOL pshader = shader_is_pshader_version(shader->baseShader.reg_maps.shader_version.type);
struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
SHADER_BUFFER *buffer = ins->ctx->buffer;
char src0_param[256];
if(ins->handler_idx == WINED3DSIH_MOVA) {
struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
struct wined3d_shader_src_param tmp_src = ins->src[0];
char write_mask[6];
......@@ -1188,6 +1189,16 @@ static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
shader_addline(buffer, "ARL A0.x, %s;\n", src0_param);
}
}
else if(ins->dst[0].reg.type == WINED3DSPR_COLOROUT && ins->dst[0].reg.idx == 0 && pshader)
{
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) shader;
if(ctx->cur_ps_args->super.srgb_correction && ps->color0_mov)
{
shader_addline(buffer, "#mov handled in srgb write code\n");
return;
}
shader_hw_map2gl(ins);
}
else
{
shader_hw_map2gl(ins);
......@@ -2022,7 +2033,7 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
const WineD3D_GL_Info *gl_info = &((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info;
const local_constant *lconst;
GLuint retval;
const char *fragcolor;
char fragcolor[16];
DWORD *lconst_map = local_const_mapping((IWineD3DBaseShaderImpl *) This);
struct shader_arb_ctx_priv priv_ctx;
......@@ -2065,13 +2076,17 @@ static GLuint shader_arb_generate_pshader(IWineD3DPixelShaderImpl *This,
if (reg_maps->shader_version.major < 2)
{
fragcolor = "R0";
strcpy(fragcolor, "R0");
} else {
if(args->super.srgb_correction) {
shader_addline(buffer, "TEMP TMP_COLOR;\n");
fragcolor = "TMP_COLOR";
if(This->color0_mov) {
sprintf(fragcolor, "R%u", This->color0_reg);
} else {
shader_addline(buffer, "TEMP TMP_COLOR;\n");
strcpy(fragcolor, "TMP_COLOR");
}
} else {
fragcolor = "result.color";
strcpy(fragcolor, "result.color");
}
}
......
......@@ -588,6 +588,7 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
else
{
int i, limit;
BOOL color0_mov = FALSE;
/* This will loop over all the registers and try to
* make a bitmask of the ones we're interested in.
......@@ -612,6 +613,11 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
}
else
{
if(pshader && dst_param.reg.type == WINED3DSPR_COLOROUT && dst_param.reg.idx == 0)
{
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) This;
ps->color0_mov = FALSE;
}
shader_record_register_usage(This, reg_maps, &dst_param.reg, pshader);
}
......@@ -651,6 +657,22 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
{
reg_maps->bumpmat[dst_param.reg.idx] = TRUE;
}
else if(pshader && ins.handler_idx == WINED3DSIH_MOV)
{
/* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to
* COLOROUT 0. If we know this in advance, the ARB shader backend can skip
* the mov and perform the sRGB write correction from the source register.
*
* However, if the mov is only partial, we can't do this, and if the write
* comes from an instruction other than MOV it is hard to do as well. If
* COLOROUT 0 is overwritten partially later, the marker is dropped again
*/
if(dst_param.reg.type == WINED3DSPR_COLOROUT && dst_param.reg.idx == 0)
{
/* Used later when the source register is read */
color0_mov = TRUE;
}
}
}
if (ins.handler_idx == WINED3DSIH_NRM)
......@@ -686,6 +708,17 @@ HRESULT shader_get_registers_used(IWineD3DBaseShader *iface, const struct wined3
shader_record_register_usage(This, reg_maps, &src_param.reg, pshader);
--count;
}
if(color0_mov)
{
IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) This;
if(src_param.reg.type == WINED3DSPR_TEMP &&
src_param.swizzle == WINED3DSP_NOSWIZZLE)
{
ps->color0_mov = TRUE;
ps->color0_reg = src_param.reg.idx;
}
}
}
}
}
......
......@@ -2711,6 +2711,10 @@ typedef struct IWineD3DPixelShaderImpl {
unsigned char numbumpenvmatconsts;
struct stb_const_desc luminanceconst[MAX_TEXTURES];
char vpos_uniform;
BOOL color0_mov;
DWORD color0_reg;
} IWineD3DPixelShaderImpl;
extern const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment