Commit daf2290e authored by Stefan Dösinger's avatar Stefan Dösinger Committed by Alexandre Julliard

wined3d: Implement the nrm instruction in arb.

parent 1407bdb2
...@@ -1477,6 +1477,28 @@ void vshader_hw_rsq_rcp(SHADER_OPCODE_ARG* arg) { ...@@ -1477,6 +1477,28 @@ void vshader_hw_rsq_rcp(SHADER_OPCODE_ARG* arg) {
shader_addline(buffer, "%s;\n", tmpLine); shader_addline(buffer, "%s;\n", tmpLine);
} }
void shader_hw_nrm(SHADER_OPCODE_ARG* arg) {
SHADER_BUFFER* buffer = arg->buffer;
char dst_name[50];
char src_name[50];
char dst_wmask[20];
DWORD shift = (arg->dst & WINED3DSP_DSTSHIFT_MASK) >> WINED3DSP_DSTSHIFT_SHIFT;
BOOL sat = (arg->dst & WINED3DSP_DSTMOD_MASK) & WINED3DSPDM_SATURATE;
pshader_get_register_name(arg->dst, dst_name);
shader_arb_get_write_mask(arg, arg->dst, dst_wmask);
pshader_gen_input_modifier_line(buffer, arg->src[0], 0, src_name);
shader_addline(buffer, "DP3 TMP, %s, %s;\n", src_name, src_name);
shader_addline(buffer, "RSQ TMP, TMP.x;\n");
/* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/
shader_addline(buffer, "MUL%s %s%s, %s, TMP;\n", sat ? "_SAT" : "", dst_name, dst_wmask,
src_name);
if (shift != 0)
pshader_gen_output_modifier_line(buffer, FALSE, dst_wmask, shift, dst_name);
}
/* TODO: merge with pixel shader */ /* TODO: merge with pixel shader */
/* Map the opcode 1-to-1 to the GL code */ /* Map the opcode 1-to-1 to the GL code */
void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg) { void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg) {
......
...@@ -381,6 +381,9 @@ HRESULT shader_get_registers_used( ...@@ -381,6 +381,9 @@ HRESULT shader_get_registers_used(
} }
} }
} }
if(WINED3DSIO_NRM == curOpcode->opcode) {
reg_maps->usesnrm = 1;
}
/* This will loop over all the registers and try to /* This will loop over all the registers and try to
* make a bitmask of the ones we're interested in. * make a bitmask of the ones we're interested in.
......
...@@ -177,16 +177,7 @@ CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = { ...@@ -177,16 +177,7 @@ CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
{WINED3DSIO_CMP, "cmp", NULL, 1, 4, pshader_hw_cmp, shader_glsl_cmp, WINED3DPS_VERSION(1,2), WINED3DPS_VERSION(3,0)}, {WINED3DSIO_CMP, "cmp", NULL, 1, 4, pshader_hw_cmp, shader_glsl_cmp, WINED3DPS_VERSION(1,2), WINED3DPS_VERSION(3,0)},
{WINED3DSIO_POW, "pow", "POW", 1, 3, pshader_hw_map2gl, shader_glsl_pow, 0, 0}, {WINED3DSIO_POW, "pow", "POW", 1, 3, pshader_hw_map2gl, shader_glsl_pow, 0, 0},
{WINED3DSIO_CRS, "crs", "XPD", 1, 3, pshader_hw_map2gl, shader_glsl_cross, 0, 0}, {WINED3DSIO_CRS, "crs", "XPD", 1, 3, pshader_hw_map2gl, shader_glsl_cross, 0, 0},
/* TODO: xyz normalise can be performed as VS_ARB using one temporary register, {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
DP3 tmp , vec, vec;
RSQ tmp, tmp.x;
MUL vec.xyz, vec, tmp;
but I think this is better because it accounts for w properly.
DP3 tmp , vec, vec;
RSQ tmp, tmp.x;
MUL vec, vec, tmp;
*/
{WINED3DSIO_NRM, "nrm", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
{WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DPS_VERSION(2,0), WINED3DPS_VERSION(2,1)}, {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DPS_VERSION(2,0), WINED3DPS_VERSION(2,1)},
{WINED3DSIO_SINCOS, "sincos", NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DPS_VERSION(3,0), -1}, {WINED3DSIO_SINCOS, "sincos", NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DPS_VERSION(3,0), -1},
{WINED3DSIO_DP2ADD, "dp2add", NULL, 1, 4, pshader_hw_dp2add, pshader_glsl_dp2add, WINED3DPS_VERSION(2,0), -1}, {WINED3DSIO_DP2ADD, "dp2add", NULL, 1, 4, pshader_hw_dp2add, pshader_glsl_dp2add, WINED3DPS_VERSION(2,0), -1},
......
...@@ -114,16 +114,7 @@ CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = { ...@@ -114,16 +114,7 @@ CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
RCP tmp, vec RCP tmp, vec
MUL out, tmp, vec*/ MUL out, tmp, vec*/
{WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0}, {WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
/* TODO: xyz normalise can be performed as VS_ARB using one temporary register, {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
DP3 tmp , vec, vec;
RSQ tmp, tmp.x;
MUL vec.xyz, vec, tmp;
but I think this is better because it accounts for w properly.
DP3 tmp , vec, vec;
RSQ tmp, tmp.x;
MUL vec, vec, tmp;
*/
{WINED3DSIO_NRM, "nrm", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
{WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)}, {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, NULL, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
{WINED3DSIO_SINCOS, "sincos", NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1}, {WINED3DSIO_SINCOS, "sincos", NULL, 1, 2, NULL, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
/* Matrix */ /* Matrix */
...@@ -382,6 +373,11 @@ static VOID IWineD3DVertexShaderImpl_GenerateShader( ...@@ -382,6 +373,11 @@ static VOID IWineD3DVertexShaderImpl_GenerateShader(
This->baseShader.limits.constant_float = This->baseShader.limits.constant_float =
min(95, This->baseShader.limits.constant_float); min(95, This->baseShader.limits.constant_float);
/* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
if(reg_maps->usesnrm) {
shader_addline(&buffer, "TEMP TMP;\n");
}
/* Base Declarations */ /* Base Declarations */
shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION); shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
......
...@@ -1652,6 +1652,7 @@ typedef struct shader_reg_maps { ...@@ -1652,6 +1652,7 @@ typedef struct shader_reg_maps {
* Use 0 as default (bit 31 is always 1 on a valid token) */ * Use 0 as default (bit 31 is always 1 on a valid token) */
DWORD samplers[max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS)]; DWORD samplers[max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS)];
char bumpmat, luminanceparams; char bumpmat, luminanceparams;
char usesnrm;
/* Whether or not a loop is used in this shader */ /* Whether or not a loop is used in this shader */
char loop; char loop;
...@@ -1788,6 +1789,9 @@ extern void pshader_hw_texm3x3(SHADER_OPCODE_ARG* arg); ...@@ -1788,6 +1789,9 @@ extern void pshader_hw_texm3x3(SHADER_OPCODE_ARG* arg);
extern void pshader_hw_texm3x2depth(SHADER_OPCODE_ARG* arg); extern void pshader_hw_texm3x2depth(SHADER_OPCODE_ARG* arg);
extern void pshader_hw_dp2add(SHADER_OPCODE_ARG* arg); extern void pshader_hw_dp2add(SHADER_OPCODE_ARG* arg);
/* ARB vertex / pixel shader common prototypes */
extern void shader_hw_nrm(SHADER_OPCODE_ARG* arg);
/* ARB vertex shader prototypes */ /* ARB vertex shader prototypes */
extern void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg); extern void vshader_hw_map2gl(SHADER_OPCODE_ARG* arg);
extern void vshader_hw_mnxn(SHADER_OPCODE_ARG* arg); extern void vshader_hw_mnxn(SHADER_OPCODE_ARG* arg);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment