Commit ad217029 authored by Stefan Dösinger's avatar Stefan Dösinger Committed by Alexandre Julliard

wined3d: Get rid of TMP accesses in texm3x3* instructions.

parent ced325f8
......@@ -1294,9 +1294,16 @@ static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins)
SHADER_BUFFER *buffer = ins->ctx->buffer;
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
char src0_name[50];
unsigned int dst;
/* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
* incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
* register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
*/
dst = reg + 2 - current_state->current_row;
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
shader_addline(buffer, "DP3 TMP.%c, fragment.texcoord[%u], %s;\n", 'x' + current_state->current_row, reg, src0_name);
shader_addline(buffer, "DP3 T%u.%c, fragment.texcoord[%u], %s;\n", dst, 'x' + current_state->current_row, reg, src0_name);
current_state->texcoord_w[current_state->current_row++] = reg;
}
......@@ -1309,15 +1316,17 @@ static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins)
SHADER_BUFFER *buffer = ins->ctx->buffer;
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
char dst_str[50];
char dst_reg[8];
char src0_name[50];
sprintf(dst_reg, "T%u", reg);
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
/* Sample the texture using the calculated coordinates */
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
current_state->current_row = 0;
}
......@@ -1331,10 +1340,14 @@ static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins
SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
char dst_str[50];
char src0_name[50];
char dst_reg[8];
/* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
* components for temporary data storage
*/
sprintf(dst_reg, "T%u", reg);
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
/* Note: TMP.xy is input here, generated in earlier texm3x3pad instructions */
shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
/* Construct the eye-ray vector from w coordinates */
shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", current_state->texcoord_w[0]);
......@@ -1343,18 +1356,18 @@ static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins
/* Calculate reflection vector
*/
shader_addline(buffer, "DP3 TMP.w, TMP, TB;\n");
shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg);
/* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
shader_addline(buffer, "DP3 TB.w, TMP, TMP;\n");
shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg);
shader_addline(buffer, "RCP TB.w, TB.w;\n");
shader_addline(buffer, "MUL TMP.w, TMP.w, TB.w;\n");
shader_addline(buffer, "MUL TMP, TMP.w, TMP;\n");
shader_addline(buffer, "MAD TMP, coefmul.x, TMP, -TB;\n");
shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg);
shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg);
/* Sample the texture using the calculated coordinates */
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
current_state->current_row = 0;
}
......@@ -1369,11 +1382,13 @@ static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
char dst_str[50];
char src0_name[50];
char src1_name[50];
char dst_reg[8];
shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
/* Note: TMP.xy is input here, generated by two texm3x3pad instructions */
shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", reg, src0_name);
sprintf(dst_reg, "T%u", reg);
shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
/* Calculate reflection vector.
*
......@@ -1383,17 +1398,17 @@ static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
*
* Which normalizes the normal vector
*/
shader_addline(buffer, "DP3 TMP.w, TMP, %s;\n", src1_name);
shader_addline(buffer, "DP3 TC.w, TMP, TMP;\n");
shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name);
shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg);
shader_addline(buffer, "RCP TC.w, TC.w;\n");
shader_addline(buffer, "MUL TMP.w, TMP.w, TC.w;\n");
shader_addline(buffer, "MUL TMP, TMP.w, TMP;\n");
shader_addline(buffer, "MAD TMP, coefmul.x, TMP, -%s;\n", src1_name);
shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg);
shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name);
/* Sample the texture using the calculated coordinates */
shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
flags = reg < MAX_TEXTURES ? deviceImpl->stateBlock->textureState[reg][WINED3DTSS_TEXTURETRANSFORMFLAGS] : 0;
shader_hw_sample(ins, reg, dst_str, "TMP", flags & WINED3DTTFF_PROJECTED, FALSE);
shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3DTTFF_PROJECTED, FALSE);
current_state->current_row = 0;
}
......@@ -1469,8 +1484,8 @@ static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins)
shader_arb_get_dst_param(ins, dst, dst_str);
shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
shader_addline(buffer, "DP3 TMP.z, fragment.texcoord[%u], %s;\n", dst->reg.idx, src0);
shader_addline(buffer, "MOV %s, TMP;\n", dst_str);
shader_addline(buffer, "DP3 T%u.z, fragment.texcoord[%u], %s;\n", dst->reg.idx, dst->reg.idx, src0);
shader_addline(buffer, "MOV %s, T%u;\n", dst_str, dst->reg.idx);
}
/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment