glsl_shader.c 89.1 KB
Newer Older
1 2 3 4
/*
 * GLSL pixel and vertex shader implementation
 *
 * Copyright 2006 Jason Green 
5
 * Copyright 2006-2007 Henri Verbeet
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

22 23 24 25 26 27 28 29
/*
 * D3D shader asm has swizzles on source parameters, and write masks for
 * destination parameters. GLSL uses swizzles for both. The result of this is
 * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
 * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
 * mask for the destination parameter into account.
 */

30 31 32 33 34
#include "config.h"
#include <stdio.h>
#include "wined3d_private.h"

WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
35
WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
36 37 38

#define GLINFO_LOCATION      (*gl_info)

39 40 41 42 43
typedef struct {
    char reg_name[50];
    char mask_str[6];
} glsl_dst_param_t;

44 45 46 47 48
typedef struct {
    char reg_name[50];
    char param_str[100];
} glsl_src_param_t;

49 50 51 52 53
typedef struct {
    const char *name;
    DWORD coord_mask;
} glsl_sample_function_t;

54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
/** Prints the GLSL info log which will contain error messages if they exist */
void print_glsl_info_log(WineD3D_GL_Info *gl_info, GLhandleARB obj) {
    
    int infologLength = 0;
    char *infoLog;

    GL_EXTCALL(glGetObjectParameterivARB(obj,
               GL_OBJECT_INFO_LOG_LENGTH_ARB,
               &infologLength));

    /* A size of 1 is just a null-terminated string, so the log should be bigger than
     * that if there are errors. */
    if (infologLength > 1)
    {
        infoLog = (char *)HeapAlloc(GetProcessHeap(), 0, infologLength);
        GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
70
        FIXME("Error received from GLSL shader #%u: %s\n", obj, debugstr_a(infoLog));
71 72 73 74
        HeapFree(GetProcessHeap(), 0, infoLog);
    }
}

75 76 77
/**
 * Loads (pixel shader) samplers
 */
78
static void shader_glsl_load_psamplers(
79 80 81 82
    WineD3D_GL_Info *gl_info,
    IWineD3DStateBlock* iface) {

    IWineD3DStateBlockImpl* stateBlock = (IWineD3DStateBlockImpl*) iface;
83
    GLhandleARB programId = stateBlock->glsl_program->programId;
84 85 86 87
    GLhandleARB name_loc;
    int i;
    char sampler_name[20];

88
    for (i=0; i< GL_LIMITS(samplers); ++i) {
89
        if (stateBlock->textures[i] != NULL) {
90
           snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i);
91 92
           name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
           if (name_loc != -1) {
93
               TRACE("Loading %s for texture %d\n", sampler_name, i);
94 95 96 97 98 99 100 101 102
               GL_EXTCALL(glUniform1iARB(name_loc, i));
               checkGLcall("glUniform1iARB");
           }
        }
    }
}

/** 
 * Loads floating point constants (aka uniforms) into the currently set GLSL program.
103
 * When constant_list == NULL, it will load all the constants.
104
 */
105
static void shader_glsl_load_constantsF(IWineD3DBaseShaderImpl* This, WineD3D_GL_Info *gl_info,
106
        unsigned int max_constants, float* constants, GLhandleARB *constant_locations,
107
        struct list *constant_list) {
108
    constants_entry *constant;
109
    local_constant* lconst;
110
    GLhandleARB tmp_loc;
111 112
    DWORD i, j;
    DWORD *idx;
113

114
    if (TRACE_ON(d3d_shader)) {
115 116 117 118 119 120 121 122 123 124 125
        LIST_FOR_EACH_ENTRY(constant, constant_list, constants_entry, entry) {
            idx = constant->idx;
            j = constant->count;
            while (j--) {
                i = *idx++;
                tmp_loc = constant_locations[i];
                if (tmp_loc != -1) {
                    TRACE_(d3d_constants)("Loading constants %i: %f, %f, %f, %f\n", i,
                            constants[i * 4 + 0], constants[i * 4 + 1],
                            constants[i * 4 + 2], constants[i * 4 + 3]);
                }
126 127
            }
        }
128
    }
129 130 131 132 133 134 135 136 137 138
    LIST_FOR_EACH_ENTRY(constant, constant_list, constants_entry, entry) {
        idx = constant->idx;
        j = constant->count;
        while (j--) {
            i = *idx++;
            tmp_loc = constant_locations[i];
            if (tmp_loc != -1) {
                /* We found this uniform name in the program - go ahead and send the data */
                GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, constants + (i * 4)));
            }
139 140 141
        }
    }
    checkGLcall("glUniform4fvARB()");
142 143

    /* Load immediate constants */
144 145 146 147 148
    if (TRACE_ON(d3d_shader)) {
        LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
            tmp_loc = constant_locations[lconst->idx];
            if (tmp_loc != -1) {
                GLfloat* values = (GLfloat*)lconst->value;
149
                TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx,
150 151 152 153 154 155
                        values[0], values[1], values[2], values[3]);
            }
        }
    }
    LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
        tmp_loc = constant_locations[lconst->idx];
156 157
        if (tmp_loc != -1) {
            /* We found this uniform name in the program - go ahead and send the data */
158
            GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, (GLfloat*)lconst->value));
159 160
        }
    }
161
    checkGLcall("glUniform4fvARB()");
162 163
}

164 165 166 167
/** 
 * Loads integer constants (aka uniforms) into the currently set GLSL program.
 * When @constants_set == NULL, it will load all the constants.
 */
168
static void shader_glsl_load_constantsI(
169
    IWineD3DBaseShaderImpl* This,
170 171 172 173
    WineD3D_GL_Info *gl_info,
    GLhandleARB programId,
    unsigned max_constants,
    int* constants,
174
    BOOL* constants_set) {
175 176 177
    
    GLhandleARB tmp_loc;
    int i;
178
    char tmp_name[8];
179
    char is_pshader = shader_is_pshader_version(This->baseShader.hex_version);
180
    const char* prefix = is_pshader? "PI":"VI";
181
    struct list* ptr;
182 183 184 185

    for (i=0; i<max_constants; ++i) {
        if (NULL == constants_set || constants_set[i]) {

186
            TRACE_(d3d_constants)("Loading constants %i: %i, %i, %i, %i\n",
187 188 189 190 191 192 193 194 195 196 197 198 199
                  i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);

            /* TODO: Benchmark and see if it would be beneficial to store the 
             * locations of the constants to avoid looking up each time */
            snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
            tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
            if (tmp_loc != -1) {
                /* We found this uniform name in the program - go ahead and send the data */
                GL_EXTCALL(glUniform4ivARB(tmp_loc, 1, &constants[i*4]));
                checkGLcall("glUniform4ivARB");
            }
        }
    }
200 201 202 203 204 205 206 207

    /* Load immediate constants */
    ptr = list_head(&This->baseShader.constantsI);
    while (ptr) {
        local_constant* lconst = LIST_ENTRY(ptr, struct local_constant, entry);
        unsigned int idx = lconst->idx;
        GLint* values = (GLint*) lconst->value;

208
        TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
209 210 211 212 213 214 215 216 217 218 219
            values[0], values[1], values[2], values[3]);

        snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
        tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
        if (tmp_loc != -1) {
            /* We found this uniform name in the program - go ahead and send the data */
            GL_EXTCALL(glUniform4ivARB(tmp_loc, 1, values));
            checkGLcall("glUniform4ivARB");
        }
        ptr = list_next(&This->baseShader.constantsI, ptr);
    }
220 221 222 223 224 225
}

/** 
 * Loads boolean constants (aka uniforms) into the currently set GLSL program.
 * When @constants_set == NULL, it will load all the constants.
 */
226
static void shader_glsl_load_constantsB(
227
    IWineD3DBaseShaderImpl* This,
228 229 230 231
    WineD3D_GL_Info *gl_info,
    GLhandleARB programId,
    unsigned max_constants,
    BOOL* constants,
232
    BOOL* constants_set) {
233 234 235
    
    GLhandleARB tmp_loc;
    int i;
236
    char tmp_name[8];
237
    char is_pshader = shader_is_pshader_version(This->baseShader.hex_version);
238
    const char* prefix = is_pshader? "PB":"VB";
239
    struct list* ptr;
240 241 242 243

    for (i=0; i<max_constants; ++i) {
        if (NULL == constants_set || constants_set[i]) {

244
            TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i*4]);
245 246 247 248 249 250 251 252 253 254 255 256

            /* TODO: Benchmark and see if it would be beneficial to store the 
             * locations of the constants to avoid looking up each time */
            snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
            tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
            if (tmp_loc != -1) {
                /* We found this uniform name in the program - go ahead and send the data */
                GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i*4]));
                checkGLcall("glUniform1ivARB");
            }
        }
    }
257 258 259 260 261 262 263 264

    /* Load immediate constants */
    ptr = list_head(&This->baseShader.constantsB);
    while (ptr) {
        local_constant* lconst = LIST_ENTRY(ptr, struct local_constant, entry);
        unsigned int idx = lconst->idx;
        GLint* values = (GLint*) lconst->value;

265
        TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
266 267 268 269 270 271 272 273 274 275

        snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
        tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
        if (tmp_loc != -1) {
            /* We found this uniform name in the program - go ahead and send the data */
            GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
            checkGLcall("glUniform1ivARB");
        }
        ptr = list_next(&This->baseShader.constantsB, ptr);
    }
276 277 278 279
}



280 281 282 283
/**
 * Loads the app-supplied constants into the currently set GLSL program.
 */
void shader_glsl_load_constants(
284
    IWineD3DDevice* device,
285 286
    char usePixelShader,
    char useVertexShader) {
287 288 289 290 291
   
    IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) device;
    IWineD3DStateBlockImpl* stateBlock = deviceImpl->stateBlock;
    WineD3D_GL_Info *gl_info = &((IWineD3DImpl*) deviceImpl->wineD3D)->gl_info;

292
    GLhandleARB *constant_locations;
293
    struct list *constant_list;
294
    GLhandleARB programId;
295 296
    GLint pos;

297
    if (!stateBlock->glsl_program) {
298 299 300
        /* No GLSL program set - nothing to do. */
        return;
    }
301
    programId = stateBlock->glsl_program->programId;
302 303

    if (useVertexShader) {
304
        IWineD3DBaseShaderImpl* vshader = (IWineD3DBaseShaderImpl*) stateBlock->vertexShader;
305
        GLint pos;
306

307
        constant_locations = stateBlock->glsl_program->vuniformF_locations;
308
        constant_list = &stateBlock->set_vconstantsF;
309

310
        /* Load DirectX 9 float constants/uniforms for vertex shader */
311
        shader_glsl_load_constantsF(vshader, gl_info, GL_LIMITS(vshader_constantsF),
312
                stateBlock->vertexShaderConstantF, constant_locations, constant_list);
313

314
        /* Load DirectX 9 integer constants/uniforms for vertex shader */
315
        shader_glsl_load_constantsI(vshader, gl_info, programId, MAX_CONST_I,
316
                                    stateBlock->vertexShaderConstantI,
317
                                    stateBlock->set.vertexShaderConstantsI);
318 319

        /* Load DirectX 9 boolean constants/uniforms for vertex shader */
320
        shader_glsl_load_constantsB(vshader, gl_info, programId, MAX_CONST_B,
321
                                    stateBlock->vertexShaderConstantB,
322
                                    stateBlock->set.vertexShaderConstantsB);
323 324 325 326

        /* Upload the position fixup params */
        pos = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
        checkGLcall("glGetUniformLocationARB");
327
        GL_EXTCALL(glUniform4fvARB(pos, 1, &deviceImpl->posFixup[0]));
328
        checkGLcall("glUniform4fvARB");
329 330 331 332
    }

    if (usePixelShader) {

333 334
        IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;

335
        constant_locations = stateBlock->glsl_program->puniformF_locations;
336
        constant_list = &stateBlock->set_pconstantsF;
337

338
        /* Load pixel shader samplers */
339
        shader_glsl_load_psamplers(gl_info, (IWineD3DStateBlock*) stateBlock);
340 341

        /* Load DirectX 9 float constants/uniforms for pixel shader */
342
        shader_glsl_load_constantsF(pshader, gl_info, GL_LIMITS(pshader_constantsF),
343
                stateBlock->pixelShaderConstantF, constant_locations, constant_list);
344

345
        /* Load DirectX 9 integer constants/uniforms for pixel shader */
346
        shader_glsl_load_constantsI(pshader, gl_info, programId, MAX_CONST_I,
347
                                    stateBlock->pixelShaderConstantI, 
348
                                    stateBlock->set.pixelShaderConstantsI);
349

350
        /* Load DirectX 9 boolean constants/uniforms for pixel shader */
351
        shader_glsl_load_constantsB(pshader, gl_info, programId, MAX_CONST_B,
352
                                    stateBlock->pixelShaderConstantB, 
353
                                    stateBlock->set.pixelShaderConstantsB);
354 355 356 357

        /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from.
         * It can't be 0 for a valid texbem instruction.
         */
358
        if(((IWineD3DPixelShaderImpl *) pshader)->needsbumpmat != -1) {
359 360 361
            float *data = (float *) &stateBlock->textureState[(int) ((IWineD3DPixelShaderImpl *) pshader)->needsbumpmat][WINED3DTSS_BUMPENVMAT00];
            pos = GL_EXTCALL(glGetUniformLocationARB(programId, "bumpenvmat"));
            checkGLcall("glGetUniformLocationARB");
362
            GL_EXTCALL(glUniformMatrix2fvARB(pos, 1, 0, data));
363 364
            checkGLcall("glUniform4fvARB");
        }
365 366 367
    }
}

368 369 370 371
/** Generate the variable & register declarations for the GLSL output target */
void shader_generate_glsl_declarations(
    IWineD3DBaseShader *iface,
    shader_reg_maps* reg_maps,
372 373
    SHADER_BUFFER* buffer,
    WineD3D_GL_Info* gl_info) {
374 375 376 377 378 379 380 381

    IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
    int i;

    /* There are some minor differences between pixel and vertex shaders */
    char pshader = shader_is_pshader_version(This->baseShader.hex_version);
    char prefix = pshader ? 'P' : 'V';

382 383 384 385 386 387
    /* Prototype the subroutines */
    for (i = 0; i < This->baseShader.limits.label; i++) {
        if (reg_maps->labels[i])
            shader_addline(buffer, "void subroutine%lu();\n", i);
    }

388
    /* Declare the constants (aka uniforms) */
389 390 391 392 393
    if (This->baseShader.limits.constant_float > 0) {
        unsigned max_constantsF = min(This->baseShader.limits.constant_float, 
                (pshader ? GL_LIMITS(pshader_constantsF) : GL_LIMITS(vshader_constantsF)));
        shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
    }
394 395 396 397 398 399 400

    if (This->baseShader.limits.constant_int > 0)
        shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);

    if (This->baseShader.limits.constant_bool > 0)
        shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);

401 402
    if(!pshader)
        shader_addline(buffer, "uniform vec4 posFixup;\n");
403
    else if(reg_maps->bumpmat != -1)
404
        shader_addline(buffer, "uniform mat2 bumpenvmat;\n");
405

406 407 408 409
    /* Declare texture samplers */ 
    for (i = 0; i < This->baseShader.limits.sampler; i++) {
        if (reg_maps->samplers[i]) {

410
            DWORD stype = reg_maps->samplers[i] & WINED3DSP_TEXTURETYPE_MASK;
411 412
            switch (stype) {

413 414 415 416
                case WINED3DSTT_1D:
                    shader_addline(buffer, "uniform sampler1D %csampler%lu;\n", prefix, i);
                    break;
                case WINED3DSTT_2D:
417 418
                    shader_addline(buffer, "uniform sampler2D %csampler%lu;\n", prefix, i);
                    break;
419
                case WINED3DSTT_CUBE:
420 421
                    shader_addline(buffer, "uniform samplerCube %csampler%lu;\n", prefix, i);
                    break;
422
                case WINED3DSTT_VOLUME:
423 424 425 426
                    shader_addline(buffer, "uniform sampler3D %csampler%lu;\n", prefix, i);
                    break;
                default:
                    shader_addline(buffer, "uniform unsupported_sampler %csampler%lu;\n", prefix, i);
427
                    FIXME("Unrecognized sampler type: %#x\n", stype);
428 429 430 431 432 433 434 435
                    break;
            }
        }
    }
    
    /* Declare address variables */
    for (i = 0; i < This->baseShader.limits.address; i++) {
        if (reg_maps->address[i])
436
            shader_addline(buffer, "ivec4 A%d;\n", i);
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
    }

    /* Declare texture coordinate temporaries and initialize them */
    for (i = 0; i < This->baseShader.limits.texcoord; i++) {
        if (reg_maps->texcoord[i]) 
            shader_addline(buffer, "vec4 T%lu = gl_TexCoord[%lu];\n", i, i);
    }

    /* Declare input register temporaries */
    for (i=0; i < This->baseShader.limits.packed_input; i++) {
        if (reg_maps->packed_input[i])
            shader_addline(buffer, "vec4 IN%lu;\n", i);
    }

    /* Declare output register temporaries */
    for (i = 0; i < This->baseShader.limits.packed_output; i++) {
        if (reg_maps->packed_output[i])
            shader_addline(buffer, "vec4 OUT%lu;\n", i);
    }

    /* Declare temporary variables */
    for(i = 0; i < This->baseShader.limits.temporary; i++) {
        if (reg_maps->temporary[i])
            shader_addline(buffer, "vec4 R%lu;\n", i);
    }

    /* Declare attributes */
    for (i = 0; i < This->baseShader.limits.attributes; i++) {
        if (reg_maps->attributes[i])
            shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
    }

    /* Declare loop register aL */
470
    if (reg_maps->loop) {
471
        shader_addline(buffer, "int aL;\n");
472 473
        shader_addline(buffer, "int tmpInt;\n");
    }
474 475 476 477 478 479 480 481 482
    
    /* Temporary variables for matrix operations */
    shader_addline(buffer, "vec4 tmp0;\n");
    shader_addline(buffer, "vec4 tmp1;\n");

    /* Start the main program */
    shader_addline(buffer, "void main() {\n");
}

483 484 485 486 487 488 489
/*****************************************************************************
 * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
 *
 * For more information, see http://wiki.winehq.org/DirectX-Shaders
 ****************************************************************************/

/* Prototypes */
490
static void shader_glsl_add_src_param(SHADER_OPCODE_ARG* arg, const DWORD param,
491
        const DWORD addr_token, DWORD mask, glsl_src_param_t *src_param);
492

493
/** Used for opcode modifiers - They multiply the result by the specified amount */
494
static const char * const shift_glsl_tab[] = {
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
    "",           /*  0 (none) */ 
    "2.0 * ",     /*  1 (x2)   */ 
    "4.0 * ",     /*  2 (x4)   */ 
    "8.0 * ",     /*  3 (x8)   */ 
    "16.0 * ",    /*  4 (x16)  */ 
    "32.0 * ",    /*  5 (x32)  */ 
    "",           /*  6 (x64)  */ 
    "",           /*  7 (x128) */ 
    "",           /*  8 (d256) */ 
    "",           /*  9 (d128) */ 
    "",           /* 10 (d64)  */ 
    "",           /* 11 (d32)  */ 
    "0.0625 * ",  /* 12 (d16)  */ 
    "0.125 * ",   /* 13 (d8)   */ 
    "0.25 * ",    /* 14 (d4)   */ 
    "0.5 * "      /* 15 (d2)   */ 
};

/* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
514
static void shader_glsl_gen_modifier (
515 516 517 518 519 520 521
    const DWORD instr,
    const char *in_reg,
    const char *in_regswizzle,
    char *out_str) {

    out_str[0] = 0;
    
522
    if (instr == WINED3DSIO_TEXKILL)
523 524
        return;

525
    switch (instr & WINED3DSP_SRCMOD_MASK) {
526 527
    case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
    case WINED3DSPSM_DW:
528
    case WINED3DSPSM_NONE:
529 530
        sprintf(out_str, "%s%s", in_reg, in_regswizzle);
        break;
531
    case WINED3DSPSM_NEG:
532 533
        sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
        break;
534
    case WINED3DSPSM_NOT:
535 536
        sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
        break;
537
    case WINED3DSPSM_BIAS:
538 539
        sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
        break;
540
    case WINED3DSPSM_BIASNEG:
541 542
        sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
        break;
543
    case WINED3DSPSM_SIGN:
544 545
        sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
        break;
546
    case WINED3DSPSM_SIGNNEG:
547 548
        sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
        break;
549
    case WINED3DSPSM_COMP:
550 551
        sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
        break;
552
    case WINED3DSPSM_X2:
553 554
        sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
        break;
555
    case WINED3DSPSM_X2NEG:
556 557
        sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
        break;
558
    case WINED3DSPSM_ABS:
559 560
        sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
        break;
561
    case WINED3DSPSM_ABSNEG:
562 563 564
        sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
        break;
    default:
565
        FIXME("Unhandled modifier %u\n", (instr & WINED3DSP_SRCMOD_MASK));
566 567 568 569 570 571
        sprintf(out_str, "%s%s", in_reg, in_regswizzle);
    }
}

/** Writes the GLSL variable name that corresponds to the register that the
 * DX opcode parameter is trying to access */
572
static void shader_glsl_get_register_name(
573 574 575 576 577 578 579
    const DWORD param,
    const DWORD addr_token,
    char* regstr,
    BOOL* is_color,
    SHADER_OPCODE_ARG* arg) {

    /* oPos, oFog and oPts in D3D */
580
    static const char * const hwrastout_reg_names[] = { "gl_Position", "gl_FogFragCoord", "gl_PointSize" };
581

582
    DWORD reg = param & WINED3DSP_REGNUM_MASK;
583 584
    DWORD regtype = shader_get_regtype(param);
    IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) arg->shader;
585 586 587
    IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
    WineD3D_GL_Info* gl_info = &((IWineD3DImpl*)deviceImpl->wineD3D)->gl_info;

588
    char pshader = shader_is_pshader_version(This->baseShader.hex_version);
589
    char tmpStr[50];
590 591 592

    *is_color = FALSE;   
 
593
    switch (regtype) {
594
    case WINED3DSPR_TEMP:
595
        sprintf(tmpStr, "R%u", reg);
596
    break;
597
    case WINED3DSPR_INPUT:
598
        if (pshader) {
599
            /* Pixel shaders >= 3.0 */
600
            if (WINED3DSHADER_VERSION_MAJOR(This->baseShader.hex_version) >= 3)
601
                sprintf(tmpStr, "IN%u", reg);
602 603 604 605 606
             else {
                if (reg==0)
                    strcpy(tmpStr, "gl_Color");
                else
                    strcpy(tmpStr, "gl_SecondaryColor");
607 608
            }
        } else {
609 610
            if (vshader_input_is_color((IWineD3DVertexShader*) This, reg))
               *is_color = TRUE;
611
            sprintf(tmpStr, "attrib%u", reg);
612 613
        } 
        break;
614
    case WINED3DSPR_CONST:
615 616 617
    {
        const char* prefix = pshader? "PC":"VC";

618
        /* Relative addressing */
619
        if (param & WINED3DSHADER_ADDRMODE_RELATIVE) {
620 621 622

           /* Relative addressing on shaders 2.0+ have a relative address token, 
            * prior to that, it was hard-coded as "A0.x" because there's only 1 register */
623
           if (WINED3DSHADER_VERSION_MAJOR(This->baseShader.hex_version) >= 2)  {
624 625 626
               glsl_src_param_t rel_param;
               shader_glsl_add_src_param(arg, addr_token, 0, WINED3DSP_WRITEMASK_0, &rel_param);
               sprintf(tmpStr, "%s[%s + %u]", prefix, rel_param.param_str, reg);
627
           } else
628
               sprintf(tmpStr, "%s[A0.x + %u]", prefix, reg);
629 630

        } else
631
             sprintf(tmpStr, "%s[%u]", prefix, reg);
632

633 634
        break;
    }
635
    case WINED3DSPR_CONSTINT:
636
        if (pshader)
637
            sprintf(tmpStr, "PI[%u]", reg);
638
        else
639
            sprintf(tmpStr, "VI[%u]", reg);
640
        break;
641
    case WINED3DSPR_CONSTBOOL:
642
        if (pshader)
643
            sprintf(tmpStr, "PB[%u]", reg);
644
        else
645
            sprintf(tmpStr, "VB[%u]", reg);
646
        break;
647
    case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
648
        if (pshader) {
649
            sprintf(tmpStr, "T%u", reg);
650
        } else {
651
            sprintf(tmpStr, "A%u", reg);
652 653
        }
    break;
654
    case WINED3DSPR_LOOP:
655 656
        sprintf(tmpStr, "aL");
    break;
657
    case WINED3DSPR_SAMPLER:
658
        if (pshader)
659
            sprintf(tmpStr, "Psampler%u", reg);
660
        else
661
            sprintf(tmpStr, "Vsampler%u", reg);
662
    break;
663
    case WINED3DSPR_COLOROUT:
664 665 666
        if (reg >= GL_LIMITS(buffers)) {
            WARN("Write to render target %u, only %d supported\n", reg, 4);
        }
667
        if (GL_SUPPORT(ARB_DRAW_BUFFERS)) {
668
            sprintf(tmpStr, "gl_FragData[%u]", reg);
669
        } else { /* On older cards with GLSL support like the GeforceFX there's only one buffer. */
670
            sprintf(tmpStr, "gl_FragColor");
671 672
        }
    break;
673
    case WINED3DSPR_RASTOUT:
674 675
        sprintf(tmpStr, "%s", hwrastout_reg_names[reg]);
    break;
676
    case WINED3DSPR_DEPTHOUT:
677 678
        sprintf(tmpStr, "gl_FragDepth");
    break;
679
    case WINED3DSPR_ATTROUT:
680 681 682 683 684 685
        if (reg == 0) {
            sprintf(tmpStr, "gl_FrontColor");
        } else {
            sprintf(tmpStr, "gl_FrontSecondaryColor");
        }
    break;
686 687
    case WINED3DSPR_TEXCRDOUT:
        /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
688
        if (WINED3DSHADER_VERSION_MAJOR(This->baseShader.hex_version) >= 3)
689
            sprintf(tmpStr, "OUT%u", reg);
690
        else
691
            sprintf(tmpStr, "gl_TexCoord[%u]", reg);
692 693
    break;
    default:
694
        FIXME("Unhandled register name Type(%d)\n", regtype);
695 696 697 698 699 700 701
        sprintf(tmpStr, "unrecognized_register");
    break;
    }

    strcat(regstr, tmpStr);
}

702
/* Get the GLSL write mask for the destination register */
703
static DWORD shader_glsl_get_write_mask(const DWORD param, char *write_mask) {
704
    char *ptr = write_mask;
705
    DWORD mask = param & WINED3DSP_WRITEMASK_ALL;
706

707
    if (shader_is_scalar(param)) {
708 709
        mask = WINED3DSP_WRITEMASK_0;
    } else {
710 711 712 713 714
        *ptr++ = '.';
        if (param & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
        if (param & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
        if (param & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
        if (param & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
715 716
    }

717
    *ptr = '\0';
718 719

    return mask;
720 721
}

722 723 724 725 726 727 728 729 730 731 732
static size_t shader_glsl_get_write_mask_size(DWORD write_mask) {
    size_t size = 0;

    if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
    if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
    if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
    if (write_mask & WINED3DSP_WRITEMASK_3) ++size;

    return size;
}

733
static void shader_glsl_get_swizzle(const DWORD param, BOOL fixup, DWORD mask, char *swizzle_str) {
734 735 736
    /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
     * but addressed as "rgba". To fix this we need to swap the register's x
     * and z components. */
737
    DWORD swizzle = (param & WINED3DSP_SWIZZLE_MASK) >> WINED3DSP_SWIZZLE_SHIFT;
738 739 740
    const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
    char *ptr = swizzle_str;

741 742 743 744 745 746 747 748
    if (!shader_is_scalar(param)) {
        *ptr++ = '.';
        /* swizzle bits fields: wwzzyyxx */
        if (mask & WINED3DSP_WRITEMASK_0) *ptr++ = swizzle_chars[swizzle & 0x03];
        if (mask & WINED3DSP_WRITEMASK_1) *ptr++ = swizzle_chars[(swizzle >> 2) & 0x03];
        if (mask & WINED3DSP_WRITEMASK_2) *ptr++ = swizzle_chars[(swizzle >> 4) & 0x03];
        if (mask & WINED3DSP_WRITEMASK_3) *ptr++ = swizzle_chars[(swizzle >> 6) & 0x03];
    }
749

750
    *ptr = '\0';
751 752
}

753 754
/* From a given parameter token, generate the corresponding GLSL string.
 * Also, return the actual register name and swizzle in case the
755
 * caller needs this information as well. */
756
static void shader_glsl_add_src_param(SHADER_OPCODE_ARG* arg, const DWORD param,
757
        const DWORD addr_token, DWORD mask, glsl_src_param_t *src_param) {
758
    BOOL is_color = FALSE;
759 760 761 762 763
    char swizzle_str[6];

    src_param->reg_name[0] = '\0';
    src_param->param_str[0] = '\0';
    swizzle_str[0] = '\0';
764

765
    shader_glsl_get_register_name(param, addr_token, src_param->reg_name, &is_color, arg);
766

767 768
    shader_glsl_get_swizzle(param, is_color, mask, swizzle_str);
    shader_glsl_gen_modifier(param, src_param->reg_name, swizzle_str, src_param->param_str);
769 770 771 772 773
}

/* From a given parameter token, generate the corresponding GLSL string.
 * Also, return the actual register name and swizzle in case the
 * caller needs this information as well. */
774
static DWORD shader_glsl_add_dst_param(SHADER_OPCODE_ARG* arg, const DWORD param,
775
        const DWORD addr_token, glsl_dst_param_t *dst_param) {
776 777
    BOOL is_color = FALSE;

778 779
    dst_param->mask_str[0] = '\0';
    dst_param->reg_name[0] = '\0';
780

781 782
    shader_glsl_get_register_name(param, addr_token, dst_param->reg_name, &is_color, arg);
    return shader_glsl_get_write_mask(param, dst_param->mask_str);
783 784
}

785
/* Append the destination part of the instruction to the buffer, return the effective write mask */
786
static DWORD shader_glsl_append_dst_ext(SHADER_BUFFER *buffer, SHADER_OPCODE_ARG *arg, const DWORD param) {
787
    glsl_dst_param_t dst_param;
788 789 790
    DWORD mask;
    int shift;

791 792 793 794 795 796
    mask = shader_glsl_add_dst_param(arg, param, arg->dst_addr, &dst_param);

    if(mask) {
        shift = (param & WINED3DSP_DSTSHIFT_MASK) >> WINED3DSP_DSTSHIFT_SHIFT;
        shader_addline(buffer, "%s%s = %s(", dst_param.reg_name, dst_param.mask_str, shift_glsl_tab[shift]);
    }
797 798 799 800

    return mask;
}

801 802 803 804 805
/* Append the destination part of the instruction to the buffer, return the effective write mask */
static DWORD shader_glsl_append_dst(SHADER_BUFFER *buffer, SHADER_OPCODE_ARG *arg) {
    return shader_glsl_append_dst_ext(buffer, arg, arg->dst);
}

806 807
/** Process GLSL instruction modifiers */
void shader_glsl_add_instruction_modifiers(SHADER_OPCODE_ARG* arg) {
808
    
809
    DWORD mask = arg->dst & WINED3DSP_DSTMOD_MASK;
810 811
 
    if (arg->opcode->dst_token && mask != 0) {
812 813 814
        glsl_dst_param_t dst_param;

        shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
815

816
        if (mask & WINED3DSPDM_SATURATE) {
817
            /* _SAT means to clamp the value of the register to between 0 and 1 */
818 819
            shader_addline(arg->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
                    dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
820
        }
821
        if (mask & WINED3DSPDM_MSAMPCENTROID) {
822 823
            FIXME("_centroid modifier not handled\n");
        }
824
        if (mask & WINED3DSPDM_PARTIALPRECISION) {
825 826 827 828 829
            /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
        }
    }
}

830 831 832 833 834 835 836 837 838 839 840 841
static inline const char* shader_get_comp_op(
    const DWORD opcode) {

    DWORD op = (opcode & INST_CONTROLS_MASK) >> INST_CONTROLS_SHIFT;
    switch (op) {
        case COMPARISON_GT: return ">";
        case COMPARISON_EQ: return "==";
        case COMPARISON_GE: return ">=";
        case COMPARISON_LT: return "<";
        case COMPARISON_NE: return "!=";
        case COMPARISON_LE: return "<=";
        default:
842
            FIXME("Unrecognized comparison value: %u\n", op);
843 844 845 846
            return "(\?\?)";
    }
}

847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
static void shader_glsl_get_sample_function(DWORD sampler_type, BOOL projected, glsl_sample_function_t *sample_function) {
    /* Note that there's no such thing as a projected cube texture. */
    switch(sampler_type) {
        case WINED3DSTT_1D:
            sample_function->name = projected ? "texture1DProj" : "texture1D";
            sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
            break;
        case WINED3DSTT_2D:
            sample_function->name = projected ? "texture2DProj" : "texture2D";
            sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
            break;
        case WINED3DSTT_CUBE:
            sample_function->name = "textureCube";
            sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
            break;
        case WINED3DSTT_VOLUME:
            sample_function->name = projected ? "texture3DProj" : "texture3D";
            sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
            break;
        default:
            sample_function->name = "";
            FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
            break;
    }
}

873

874 875 876 877 878 879
/*****************************************************************************
 * 
 * Begin processing individual instruction opcodes
 * 
 ****************************************************************************/

880
/* Generate GLSL arithmetic functions (dst = src1 + src2) */
881 882 883
void shader_glsl_arith(SHADER_OPCODE_ARG* arg) {
    CONST SHADER_OPCODE* curOpcode = arg->opcode;
    SHADER_BUFFER* buffer = arg->buffer;
884 885
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
886 887
    DWORD write_mask;
    char op;
888 889 890

    /* Determine the GLSL operator to use based on the opcode */
    switch (curOpcode->opcode) {
891 892 893
        case WINED3DSIO_MUL: op = '*'; break;
        case WINED3DSIO_ADD: op = '+'; break;
        case WINED3DSIO_SUB: op = '-'; break;
894
        default:
895
            op = ' ';
896 897 898
            FIXME("Opcode %s not yet handled in GLSL\n", curOpcode->name);
            break;
    }
899 900

    write_mask = shader_glsl_append_dst(buffer, arg);
901 902 903
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
    shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str);
904 905
}

906
/* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
907
void shader_glsl_mov(SHADER_OPCODE_ARG* arg) {
908
    IWineD3DBaseShaderImpl* shader = (IWineD3DBaseShaderImpl*) arg->shader;
909
    SHADER_BUFFER* buffer = arg->buffer;
910
    glsl_src_param_t src0_param;
911 912 913
    DWORD write_mask;

    write_mask = shader_glsl_append_dst(buffer, arg);
914
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
915

916
    /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
917 918 919 920 921 922 923 924
     * shader versions WINED3DSIO_MOVA is used for this. */
    if ((WINED3DSHADER_VERSION_MAJOR(shader->baseShader.hex_version) == 1 &&
            !shader_is_pshader_version(shader->baseShader.hex_version) &&
            shader_get_regtype(arg->dst) == WINED3DSPR_ADDR) ||
            arg->opcode->opcode == WINED3DSIO_MOVA) {
        /* We need to *round* to the nearest int here. */
        size_t mask_size = shader_glsl_get_write_mask_size(write_mask);
        if (mask_size > 1) {
925
            shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n", mask_size, src0_param.param_str, mask_size, src0_param.param_str);
926
        } else {
927
            shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n", src0_param.param_str, src0_param.param_str);
928 929
        }
    } else {
930
        shader_addline(buffer, "%s);\n", src0_param.param_str);
931
    }
932 933 934 935 936 937
}

/* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
void shader_glsl_dot(SHADER_OPCODE_ARG* arg) {
    CONST SHADER_OPCODE* curOpcode = arg->opcode;
    SHADER_BUFFER* buffer = arg->buffer;
938 939
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
940 941
    DWORD dst_write_mask, src_write_mask;
    size_t dst_size = 0;
942

943 944
    dst_write_mask = shader_glsl_append_dst(buffer, arg);
    dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
945

946 947 948 949 950 951 952
    /* dp3 works on vec3, dp4 on vec4 */
    if (curOpcode->opcode == WINED3DSIO_DP4) {
        src_write_mask = WINED3DSP_WRITEMASK_ALL;
    } else {
        src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
    }

953 954
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_write_mask, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_write_mask, &src1_param);
955 956

    if (dst_size > 1) {
957
        shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
958
    } else {
959
        shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
960
    }
961 962
}

963 964 965 966
/* Note that this instruction has some restrictions. The destination write mask
 * can't contain the w component, and the source swizzles have to be .xyzw */
void shader_glsl_cross(SHADER_OPCODE_ARG *arg) {
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
967 968
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
969 970 971 972
    char dst_mask[6];

    shader_glsl_get_write_mask(arg->dst, dst_mask);
    shader_glsl_append_dst(arg->buffer, arg);
973 974 975
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_mask, &src1_param);
    shader_addline(arg->buffer, "cross(%s, %s).%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
976 977
}

978 979 980 981
/* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
void shader_glsl_map2gl(SHADER_OPCODE_ARG* arg) {
    CONST SHADER_OPCODE* curOpcode = arg->opcode;
    SHADER_BUFFER* buffer = arg->buffer;
982
    glsl_src_param_t src_param;
983 984 985
    const char *instruction;
    char arguments[256];
    DWORD write_mask;
986 987 988 989 990
    unsigned i;

    /* Determine the GLSL function to use based on the opcode */
    /* TODO: Possibly make this a table for faster lookups */
    switch (curOpcode->opcode) {
991 992 993 994 995 996 997 998 999 1000 1001 1002
        case WINED3DSIO_MIN: instruction = "min"; break;
        case WINED3DSIO_MAX: instruction = "max"; break;
        case WINED3DSIO_RSQ: instruction = "inversesqrt"; break;
        case WINED3DSIO_ABS: instruction = "abs"; break;
        case WINED3DSIO_FRC: instruction = "fract"; break;
        case WINED3DSIO_POW: instruction = "pow"; break;
        case WINED3DSIO_NRM: instruction = "normalize"; break;
        case WINED3DSIO_LOGP:
        case WINED3DSIO_LOG: instruction = "log2"; break;
        case WINED3DSIO_EXP: instruction = "exp2"; break;
        case WINED3DSIO_SGN: instruction = "sign"; break;
        default: instruction = "";
1003 1004 1005 1006
            FIXME("Opcode %s not yet handled in GLSL\n", curOpcode->name);
            break;
    }

1007
    write_mask = shader_glsl_append_dst(buffer, arg);
1008

1009
    arguments[0] = '\0';
1010
    if (curOpcode->num_params > 0) {
1011 1012
        shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src_param);
        strcat(arguments, src_param.param_str);
1013
        for (i = 2; i < curOpcode->num_params; ++i) {
1014
            strcat(arguments, ", ");
1015 1016
            shader_glsl_add_src_param(arg, arg->src[i-1], arg->src_addr[i-1], write_mask, &src_param);
            strcat(arguments, src_param.param_str);
1017 1018 1019
        }
    }

1020
    shader_addline(buffer, "%s(%s));\n", instruction, arguments);
1021 1022
}

1023
/** Process the WINED3DSIO_EXPP instruction in GLSL:
1024 1025 1026 1027 1028 1029 1030 1031 1032
 * For shader model 1.x, do the following (and honor the writemask, so use a temporary variable):
 *   dst.x = 2^(floor(src))
 *   dst.y = src - floor(src)
 *   dst.z = 2^src   (partial precision is allowed, but optional)
 *   dst.w = 1.0;
 * For 2.0 shaders, just do this (honoring writemask and swizzle):
 *   dst = 2^src;    (partial precision is allowed, but optional)
 */
void shader_glsl_expp(SHADER_OPCODE_ARG* arg) {
1033
    IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)arg->shader;
1034
    glsl_src_param_t src_param;
1035

1036
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src_param);
1037

1038 1039 1040
    if (shader->baseShader.hex_version < WINED3DPS_VERSION(2,0)) {
        char dst_mask[6];

1041 1042 1043
        shader_addline(arg->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
        shader_addline(arg->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
        shader_addline(arg->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
1044
        shader_addline(arg->buffer, "tmp0.w = 1.0;\n");
1045 1046 1047 1048

        shader_glsl_append_dst(arg->buffer, arg);
        shader_glsl_get_write_mask(arg->dst, dst_mask);
        shader_addline(arg->buffer, "tmp0%s);\n", dst_mask);
1049
    } else {
1050 1051 1052 1053 1054 1055 1056
        DWORD write_mask;
        size_t mask_size;

        write_mask = shader_glsl_append_dst(arg->buffer, arg);
        mask_size = shader_glsl_get_write_mask_size(write_mask);

        if (mask_size > 1) {
1057
            shader_addline(arg->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
1058
        } else {
1059
            shader_addline(arg->buffer, "exp2(%s));\n", src_param.param_str);
1060
        }
1061 1062 1063
    }
}

1064 1065
/** Process the RCP (reciprocal or inverse) opcode in GLSL (dst = 1 / src) */
void shader_glsl_rcp(SHADER_OPCODE_ARG* arg) {
1066
    glsl_src_param_t src_param;
1067 1068
    DWORD write_mask;
    size_t mask_size;
1069

1070 1071
    write_mask = shader_glsl_append_dst(arg->buffer, arg);
    mask_size = shader_glsl_get_write_mask_size(write_mask);
1072
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src_param);
1073 1074

    if (mask_size > 1) {
1075
        shader_addline(arg->buffer, "vec%d(1.0 / %s));\n", mask_size, src_param.param_str);
1076
    } else {
1077
        shader_addline(arg->buffer, "1.0 / %s);\n", src_param.param_str);
1078
    }
1079 1080 1081 1082
}

/** Process signed comparison opcodes in GLSL. */
void shader_glsl_compare(SHADER_OPCODE_ARG* arg) {
1083 1084
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
1085 1086
    DWORD write_mask;
    size_t mask_size;
1087

1088 1089
    write_mask = shader_glsl_append_dst(arg->buffer, arg);
    mask_size = shader_glsl_get_write_mask_size(write_mask);
1090 1091
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
1092

1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
    if (mask_size > 1) {
        const char *compare;

        switch(arg->opcode->opcode) {
            case WINED3DSIO_SLT: compare = "lessThan"; break;
            case WINED3DSIO_SGE: compare = "greaterThanEqual"; break;
            default: compare = "";
                FIXME("Can't handle opcode %s\n", arg->opcode->name);
        }

1103 1104
        shader_addline(arg->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
                src0_param.param_str, src1_param.param_str);
1105
    } else {
1106
        const char *compare;
1107

1108 1109 1110 1111
        switch(arg->opcode->opcode) {
            case WINED3DSIO_SLT: compare = "<"; break;
            case WINED3DSIO_SGE: compare = ">="; break;
            default: compare = "";
1112 1113
                FIXME("Can't handle opcode %s\n", arg->opcode->name);
        }
1114

1115 1116
        shader_addline(arg->buffer, "(%s %s %s) ? 1.0 : 0.0);\n",
                src0_param.param_str, compare, src1_param.param_str);
1117 1118 1119
    }
}

1120
/** Process CMP instruction in GLSL (dst = src0 >= 0.0 ? src1 : src2), per channel */
1121
void shader_glsl_cmp(SHADER_OPCODE_ARG* arg) {
1122 1123 1124
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
    glsl_src_param_t src2_param;
1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
    DWORD write_mask, cmp_channel = 0;
    unsigned int i, j;

    /* Cycle through all source0 channels */
    for (i=0; i<4; i++) {
        write_mask = 0;
        /* Find the destination channels which use the current source0 channel */
        for (j=0; j<4; j++) {
            if ( ((arg->src[0] >> (WINED3DSP_SWIZZLE_SHIFT + 2*j)) & 0x3) == i ) {
                write_mask |= WINED3DSP_WRITEMASK_0 << j;
                cmp_channel = WINED3DSP_WRITEMASK_0 << j;
            }
        }
        write_mask = shader_glsl_append_dst_ext(arg->buffer, arg, arg->dst & (~WINED3DSP_SWIZZLE_MASK | write_mask));
        if (!write_mask) continue;
1140

1141 1142 1143
        shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], cmp_channel, &src0_param);
        shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
        shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1144

1145 1146
        shader_addline(arg->buffer, "%s >= 0.0 ? %s : %s);\n",
                src0_param.param_str, src1_param.param_str, src2_param.param_str);
1147
    }
1148 1149
}

1150
/** Process the CND opcode in GLSL (dst = (src0 > 0.5) ? src1 : src2) */
1151 1152
/* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
 * the compare is done per component of src0. */
1153
void shader_glsl_cnd(SHADER_OPCODE_ARG* arg) {
1154
    IWineD3DBaseShaderImpl* shader = (IWineD3DBaseShaderImpl*) arg->shader;
1155 1156 1157
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
    glsl_src_param_t src2_param;
1158 1159
    DWORD write_mask, cmp_channel = 0;
    unsigned int i, j;
1160 1161

    if (shader->baseShader.hex_version < WINED3DPS_VERSION(1, 4)) {
1162
        write_mask = shader_glsl_append_dst(arg->buffer, arg);
1163
        shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
1164 1165
        shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
        shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1166
        shader_addline(arg->buffer, "%s > 0.5 ? %s : %s);\n",
1167 1168
                src0_param.param_str, src1_param.param_str, src2_param.param_str);
        return;
1169
    }
1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
    /* Cycle through all source0 channels */
    for (i=0; i<4; i++) {
        write_mask = 0;
        /* Find the destination channels which use the current source0 channel */
        for (j=0; j<4; j++) {
            if ( ((arg->src[0] >> (WINED3DSP_SWIZZLE_SHIFT + 2*j)) & 0x3) == i ) {
                write_mask |= WINED3DSP_WRITEMASK_0 << j;
                cmp_channel = WINED3DSP_WRITEMASK_0 << j;
            }
        }
        write_mask = shader_glsl_append_dst_ext(arg->buffer, arg, arg->dst & (~WINED3DSP_SWIZZLE_MASK | write_mask));
        if (!write_mask) continue;
1182

1183 1184 1185
        shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], cmp_channel, &src0_param);
        shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
        shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1186

1187
        shader_addline(arg->buffer, "%s > 0.5 ? %s : %s);\n",
1188
                src0_param.param_str, src1_param.param_str, src2_param.param_str);
1189
    }
1190 1191
}

1192
/** GLSL code generation for WINED3DSIO_MAD: Multiply the first 2 opcodes, then add the last */
1193
void shader_glsl_mad(SHADER_OPCODE_ARG* arg) {
1194 1195 1196
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
    glsl_src_param_t src2_param;
1197
    DWORD write_mask;
1198

1199
    write_mask = shader_glsl_append_dst(arg->buffer, arg);
1200 1201 1202 1203 1204
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
    shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
    shader_addline(arg->buffer, "(%s * %s) + %s);\n",
            src0_param.param_str, src1_param.param_str, src2_param.param_str);
1205 1206
}

1207
/** Handles transforming all WINED3DSIO_M?x? opcodes for 
1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220
    Vertex shaders to GLSL codes */
void shader_glsl_mnxn(SHADER_OPCODE_ARG* arg) {
    int i;
    int nComponents = 0;
    SHADER_OPCODE_ARG tmpArg;
   
    memset(&tmpArg, 0, sizeof(SHADER_OPCODE_ARG));

    /* Set constants for the temporary argument */
    tmpArg.shader      = arg->shader;
    tmpArg.buffer      = arg->buffer;
    tmpArg.src[0]      = arg->src[0];
    tmpArg.src_addr[0] = arg->src_addr[0];
1221
    tmpArg.src_addr[1] = arg->src_addr[1];
1222 1223 1224
    tmpArg.reg_maps = arg->reg_maps; 
    
    switch(arg->opcode->opcode) {
1225
        case WINED3DSIO_M4x4:
1226
            nComponents = 4;
1227
            tmpArg.opcode = shader_get_opcode(arg->shader, WINED3DSIO_DP4);
1228
            break;
1229
        case WINED3DSIO_M4x3:
1230
            nComponents = 3;
1231
            tmpArg.opcode = shader_get_opcode(arg->shader, WINED3DSIO_DP4);
1232
            break;
1233
        case WINED3DSIO_M3x4:
1234
            nComponents = 4;
1235
            tmpArg.opcode = shader_get_opcode(arg->shader, WINED3DSIO_DP3);
1236
            break;
1237
        case WINED3DSIO_M3x3:
1238
            nComponents = 3;
1239
            tmpArg.opcode = shader_get_opcode(arg->shader, WINED3DSIO_DP3);
1240
            break;
1241
        case WINED3DSIO_M3x2:
1242
            nComponents = 2;
1243
            tmpArg.opcode = shader_get_opcode(arg->shader, WINED3DSIO_DP3);
1244 1245 1246 1247 1248 1249
            break;
        default:
            break;
    }

    for (i = 0; i < nComponents; i++) {
1250
        tmpArg.dst = ((arg->dst) & ~WINED3DSP_WRITEMASK_ALL)|(WINED3DSP_WRITEMASK_0<<i);
1251 1252 1253 1254 1255 1256 1257 1258
        tmpArg.src[1]      = arg->src[1]+i;
        shader_glsl_dot(&tmpArg);
    }
}

/**
    The LRP instruction performs a component-wise linear interpolation 
    between the second and third operands using the first operand as the
1259 1260
    blend factor.  Equation:  (dst = src2 + src0 * (src1 - src2))
    This is equivalent to mix(src2, src1, src0);
1261 1262
*/
void shader_glsl_lrp(SHADER_OPCODE_ARG* arg) {
1263 1264 1265
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
    glsl_src_param_t src2_param;
1266
    DWORD write_mask;
1267

1268
    write_mask = shader_glsl_append_dst(arg->buffer, arg);
1269

1270 1271 1272
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], write_mask, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], write_mask, &src1_param);
    shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], write_mask, &src2_param);
1273

1274 1275
    shader_addline(arg->buffer, "mix(%s, %s, %s));\n",
            src2_param.param_str, src1_param.param_str, src0_param.param_str);
1276 1277
}

1278
/** Process the WINED3DSIO_LIT instruction in GLSL:
1279 1280 1281 1282 1283 1284
 * dst.x = dst.w = 1.0
 * dst.y = (src0.x > 0) ? src0.x
 * dst.z = (src0.x > 0) ? ((src0.y > 0) ? pow(src0.y, src.w) : 0) : 0
 *                                        where src.w is clamped at +- 128
 */
void shader_glsl_lit(SHADER_OPCODE_ARG* arg) {
1285 1286 1287
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
    glsl_src_param_t src3_param;
1288
    char dst_mask[6];
1289

1290 1291 1292
    shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);

1293 1294 1295
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_1, &src1_param);
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &src3_param);
1296

1297
    shader_addline(arg->buffer, "vec4(1.0, (%s > 0.0 ? %s : 0.0), (%s > 0.0 ? ((%s > 0.0) ? pow(%s, clamp(%s, -128.0, 128.0)) : 0.0) : 0.0), 1.0)%s);\n",
1298
        src0_param.param_str, src0_param.param_str, src0_param.param_str, src1_param.param_str, src1_param.param_str, src3_param.param_str, dst_mask);
1299 1300
}

1301
/** Process the WINED3DSIO_DST instruction in GLSL:
1302 1303 1304 1305 1306 1307
 * dst.x = 1.0
 * dst.y = src0.x * src0.y
 * dst.z = src0.z
 * dst.w = src1.w
 */
void shader_glsl_dst(SHADER_OPCODE_ARG* arg) {
1308 1309 1310 1311 1312
    glsl_src_param_t src0y_param;
    glsl_src_param_t src0z_param;
    glsl_src_param_t src1y_param;
    glsl_src_param_t src1w_param;
    char dst_mask[6];
1313

1314 1315 1316
    shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);

1317 1318 1319 1320
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_1, &src0y_param);
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_2, &src0z_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_1, &src1y_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_3, &src1w_param);
1321

1322
    shader_addline(arg->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
1323
            src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
1324 1325
}

1326
/** Process the WINED3DSIO_SINCOS instruction in GLSL:
1327 1328 1329 1330 1331 1332 1333 1334 1335
 * VS 2.0 requires that specific cosine and sine constants be passed to this instruction so the hardware
 * can handle it.  But, these functions are built-in for GLSL, so we can just ignore the last 2 params.
 * 
 * dst.x = cos(src0.?)
 * dst.y = sin(src0.?)
 * dst.z = dst.z
 * dst.w = dst.w
 */
void shader_glsl_sincos(SHADER_OPCODE_ARG* arg) {
1336
    glsl_src_param_t src0_param;
1337
    DWORD write_mask;
1338

1339
    write_mask = shader_glsl_append_dst(arg->buffer, arg);
1340
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
1341 1342 1343

    switch (write_mask) {
        case WINED3DSP_WRITEMASK_0:
1344
            shader_addline(arg->buffer, "cos(%s));\n", src0_param.param_str);
1345 1346 1347
            break;

        case WINED3DSP_WRITEMASK_1:
1348
            shader_addline(arg->buffer, "sin(%s));\n", src0_param.param_str);
1349 1350 1351
            break;

        case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
1352
            shader_addline(arg->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str);
1353 1354 1355 1356 1357 1358
            break;

        default:
            ERR("Write mask should be .x, .y or .xy\n");
            break;
    }
1359 1360
}

1361
/** Process the WINED3DSIO_LOOP instruction in GLSL:
1362 1363
 * Start a for() loop where src1.y is the initial value of aL,
 *  increment aL by src1.z for a total of src1.x iterations.
1364
 *  Need to use a temporary variable for this operation.
1365
 */
1366
/* FIXME: I don't think nested loops will work correctly this way. */
1367
void shader_glsl_loop(SHADER_OPCODE_ARG* arg) {
1368 1369 1370
    glsl_src_param_t src1_param;

    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
1371 1372
  
    shader_addline(arg->buffer, "for (tmpInt = 0, aL = %s.y; tmpInt < %s.x; tmpInt++, aL += %s.z) {\n",
1373
            src1_param.reg_name, src1_param.reg_name, src1_param.reg_name);
1374 1375 1376 1377 1378 1379 1380
}

void shader_glsl_end(SHADER_OPCODE_ARG* arg) {
    shader_addline(arg->buffer, "}\n");
}

void shader_glsl_rep(SHADER_OPCODE_ARG* arg) {
1381
    glsl_src_param_t src0_param;
1382

1383 1384
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
    shader_addline(arg->buffer, "for (tmpInt = 0; tmpInt < %s; tmpInt++) {\n", src0_param.param_str);
1385 1386
}

1387
void shader_glsl_if(SHADER_OPCODE_ARG* arg) {
1388
    glsl_src_param_t src0_param;
1389

1390 1391
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
    shader_addline(arg->buffer, "if (%s) {\n", src0_param.param_str);
1392 1393 1394
}

void shader_glsl_ifc(SHADER_OPCODE_ARG* arg) {
1395 1396
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
1397

1398 1399
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
1400 1401

    shader_addline(arg->buffer, "if (%s %s %s) {\n",
1402
            src0_param.param_str, shader_get_comp_op(arg->opcode_token), src1_param.param_str);
1403 1404 1405 1406
}

void shader_glsl_else(SHADER_OPCODE_ARG* arg) {
    shader_addline(arg->buffer, "} else {\n");
1407 1408
}

1409 1410 1411 1412
void shader_glsl_break(SHADER_OPCODE_ARG* arg) {
    shader_addline(arg->buffer, "break;\n");
}

1413
/* FIXME: According to MSDN the compare is done per component. */
1414
void shader_glsl_breakc(SHADER_OPCODE_ARG* arg) {
1415 1416
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
1417

1418 1419
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
1420 1421

    shader_addline(arg->buffer, "if (%s %s %s) break;\n",
1422
            src0_param.param_str, shader_get_comp_op(arg->opcode_token), src1_param.param_str);
1423
}
1424

1425 1426
void shader_glsl_label(SHADER_OPCODE_ARG* arg) {

1427
    DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
1428 1429 1430 1431 1432
    shader_addline(arg->buffer, "}\n");
    shader_addline(arg->buffer, "void subroutine%lu () {\n",  snum);
}

void shader_glsl_call(SHADER_OPCODE_ARG* arg) {
1433
    DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
1434 1435 1436 1437
    shader_addline(arg->buffer, "subroutine%lu();\n", snum);
}

void shader_glsl_callnz(SHADER_OPCODE_ARG* arg) {
1438
    glsl_src_param_t src1_param;
1439

1440
    DWORD snum = (arg->src[0]) & WINED3DSP_REGNUM_MASK;
1441 1442
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0, &src1_param);
    shader_addline(arg->buffer, "if (%s) subroutine%lu();\n", src1_param.param_str, snum);
1443 1444
}

1445 1446 1447 1448 1449
/*********************************************
 * Pixel Shader Specific Code begins here
 ********************************************/
void pshader_glsl_tex(SHADER_OPCODE_ARG* arg) {
    IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
1450
    DWORD hex_version = This->baseShader.hex_version;
1451 1452 1453 1454 1455 1456
    char dst_swizzle[6];
    glsl_sample_function_t sample_function;
    DWORD sampler_type;
    DWORD sampler_idx;
    BOOL projected;
    DWORD mask = 0;
1457 1458

    /* All versions have a destination register */
1459
    shader_glsl_append_dst(arg->buffer, arg);
1460

1461 1462
    /* 1.0-1.4: Use destination register as sampler source.
     * 2.0+: Use provided sampler source. */
1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496
    if (hex_version < WINED3DPS_VERSION(1,4)) {
        IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
        DWORD flags;

        sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
        flags = deviceImpl->stateBlock->textureState[sampler_idx][WINED3DTSS_TEXTURETRANSFORMFLAGS];

        if (flags & WINED3DTTFF_PROJECTED) {
            projected = TRUE;
            switch (flags & ~WINED3DTTFF_PROJECTED) {
                case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
                case WINED3DTTFF_COUNT2: mask = WINED3DSP_WRITEMASK_1; break;
                case WINED3DTTFF_COUNT3: mask = WINED3DSP_WRITEMASK_2; break;
                case WINED3DTTFF_COUNT4:
                case WINED3DTTFF_DISABLE: mask = WINED3DSP_WRITEMASK_3; break;
            }
        } else {
            projected = FALSE;
        }
    } else if (hex_version < WINED3DPS_VERSION(2,0)) {
        DWORD src_mod = arg->src[0] & WINED3DSP_SRCMOD_MASK;
        sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;

        if (src_mod == WINED3DSPSM_DZ) {
            projected = TRUE;
            mask = WINED3DSP_WRITEMASK_2;
        } else if (src_mod == WINED3DSPSM_DW) {
            projected = TRUE;
            mask = WINED3DSP_WRITEMASK_3;
        } else {
            projected = FALSE;
        }
    } else {
        sampler_idx = arg->src[1] & WINED3DSP_REGNUM_MASK;
1497 1498 1499 1500 1501 1502 1503
        if(arg->opcode_token & WINED3DSI_TEXLD_PROJECT) {
                /* ps 2.0 texldp instruction always divides by the fourth component. */
                projected = TRUE;
                mask = WINED3DSP_WRITEMASK_3;
        } else {
            projected = FALSE;
        }
1504 1505 1506 1507 1508 1509
    }

    sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
    shader_glsl_get_sample_function(sampler_type, projected, &sample_function);
    mask |= sample_function.coord_mask;

1510
    if (hex_version < WINED3DPS_VERSION(2,0)) {
1511
        shader_glsl_get_write_mask(arg->dst, dst_swizzle);
1512
    } else {
1513
        shader_glsl_get_swizzle(arg->src[1], FALSE, arg->dst, dst_swizzle);
1514
    }
1515

1516 1517 1518
    /* 1.0-1.3: Use destination register as coordinate source.
       1.4+: Use provided coordinate source register. */
    if (hex_version < WINED3DPS_VERSION(1,4)) {
1519
        char coord_mask[6];
1520 1521 1522 1523
        shader_glsl_get_write_mask(mask, coord_mask);
        shader_addline(arg->buffer, "%s(Psampler%u, T%u%s)%s);\n",
                sample_function.name, sampler_idx, sampler_idx, coord_mask, dst_swizzle);
    } else {
1524 1525
        glsl_src_param_t coord_param;
        shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], mask, &coord_param);
1526
        shader_addline(arg->buffer, "%s(Psampler%u, %s)%s);\n",
1527
                sample_function.name, sampler_idx, coord_param.param_str, dst_swizzle);
1528
    }
1529 1530 1531 1532 1533 1534 1535 1536
}

void pshader_glsl_texcoord(SHADER_OPCODE_ARG* arg) {

    /* FIXME: Make this work for more than just 2D textures */
    
    IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
    SHADER_BUFFER* buffer = arg->buffer;
1537
    DWORD hex_version = This->baseShader.hex_version;
1538 1539
    DWORD write_mask;
    char dst_mask[6];
1540

1541 1542
    write_mask = shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(write_mask, dst_mask);
1543

1544
    if (hex_version != WINED3DPS_VERSION(1,4)) {
1545
        DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
1546
        shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n", reg, dst_mask);
1547
    } else {
1548 1549 1550 1551 1552 1553 1554
        DWORD reg = arg->src[0] & WINED3DSP_REGNUM_MASK;
        DWORD src_mod = arg->src[0] & WINED3DSP_SRCMOD_MASK;
        char dst_swizzle[6];

        shader_glsl_get_swizzle(arg->src[0], FALSE, write_mask, dst_swizzle);

        if (src_mod == WINED3DSPSM_DZ) {
1555
            glsl_src_param_t div_param;
1556
            size_t mask_size = shader_glsl_get_write_mask_size(write_mask);
1557
            shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_2, &div_param);
1558 1559

            if (mask_size > 1) {
1560
                shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
1561
            } else {
1562
                shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
1563 1564
            }
        } else if (src_mod == WINED3DSPSM_DW) {
1565
            glsl_src_param_t div_param;
1566
            size_t mask_size = shader_glsl_get_write_mask_size(write_mask);
1567
            shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_3, &div_param);
1568 1569

            if (mask_size > 1) {
1570
                shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
1571
            } else {
1572
                shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
1573 1574 1575 1576 1577
            }
        } else {
            shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
        }
    }
1578 1579
}

1580
/** Process the WINED3DSIO_TEXDP3TEX instruction in GLSL:
1581 1582 1583
 * Take a 3-component dot product of the TexCoord[dstreg] and src,
 * then perform a 1D texture lookup from stage dstregnum, place into dst. */
void pshader_glsl_texdp3tex(SHADER_OPCODE_ARG* arg) {
1584
    glsl_src_param_t src0_param;
1585 1586 1587
    char dst_mask[6];
    DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1588

1589
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
1590

1591 1592 1593
    shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
    shader_addline(arg->buffer, "texture1D(Psampler%u, dot(gl_TexCoord[%u].xyz, %s))%s);\n",
1594
            sampler_idx, sampler_idx, src0_param.param_str, dst_mask);
1595 1596
}

1597
/** Process the WINED3DSIO_TEXDP3 instruction in GLSL:
1598 1599
 * Take a 3-component dot product of the TexCoord[dstreg] and src. */
void pshader_glsl_texdp3(SHADER_OPCODE_ARG* arg) {
1600
    glsl_src_param_t src0_param;
1601
    DWORD dstreg = arg->dst & WINED3DSP_REGNUM_MASK;
1602 1603 1604
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
    DWORD dst_mask;
    size_t mask_size;
1605

1606 1607
    dst_mask = shader_glsl_append_dst(arg->buffer, arg);
    mask_size = shader_glsl_get_write_mask_size(dst_mask);
1608
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
1609

1610
    if (mask_size > 1) {
1611
        shader_addline(arg->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
1612
    } else {
1613
        shader_addline(arg->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
1614
    }
1615 1616
}

1617
/** Process the WINED3DSIO_TEXDEPTH instruction in GLSL:
1618 1619
 * Calculate the depth as dst.x / dst.y   */
void pshader_glsl_texdepth(SHADER_OPCODE_ARG* arg) {
1620 1621 1622
    glsl_dst_param_t dst_param;

    shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
1623

1624
    shader_addline(arg->buffer, "gl_FragDepth = %s.x / %s.y;\n", dst_param.reg_name, dst_param.reg_name);
1625 1626
}

1627
/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in GLSL:
1628 1629 1630 1631 1632
 * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz;  (tmp0.x has already been calculated)
 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
 */
void pshader_glsl_texm3x2depth(SHADER_OPCODE_ARG* arg) {
1633
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1634
    DWORD dstreg = arg->dst & WINED3DSP_REGNUM_MASK;
1635
    glsl_src_param_t src0_param;
1636

1637
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
1638

1639
    shader_addline(arg->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
1640
    shader_addline(arg->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
1641 1642
}

1643
/** Process the WINED3DSIO_TEXM3X2PAD instruction in GLSL
1644
 * Calculate the 1st of a 2-row matrix multiplication. */
1645
void pshader_glsl_texm3x2pad(SHADER_OPCODE_ARG* arg) {
1646
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1647
    DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
1648
    SHADER_BUFFER* buffer = arg->buffer;
1649
    glsl_src_param_t src0_param;
1650

1651 1652
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
    shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
1653
}
1654

1655
/** Process the WINED3DSIO_TEXM3X3PAD instruction in GLSL
1656 1657 1658 1659
 * Calculate the 1st or 2nd row of a 3-row matrix multiplication. */
void pshader_glsl_texm3x3pad(SHADER_OPCODE_ARG* arg) {

    IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
1660
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1661
    DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
1662
    SHADER_BUFFER* buffer = arg->buffer;
1663
    SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
1664
    glsl_src_param_t src0_param;
1665

1666 1667
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
    shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + current_state->current_row, reg, src0_param.param_str);
1668
    current_state->texcoord_w[current_state->current_row++] = reg;
1669 1670 1671
}

void pshader_glsl_texm3x2tex(SHADER_OPCODE_ARG* arg) {
1672
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1673
    DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
1674
    SHADER_BUFFER* buffer = arg->buffer;
1675
    glsl_src_param_t src0_param;
1676
    char dst_mask[6];
1677

1678 1679
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
    shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
1680 1681 1682

    shader_glsl_append_dst(buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
1683 1684

    /* Sample the texture using the calculated coordinates */
1685
    shader_addline(buffer, "texture2D(Psampler%u, tmp0.xy)%s);\n", reg, dst_mask);
1686
}
1687

1688
/** Process the WINED3DSIO_TEXM3X3TEX instruction in GLSL
1689
 * Perform the 3rd row of a 3x3 matrix multiply, then sample the texture using the calculated coordinates */
1690
void pshader_glsl_texm3x3tex(SHADER_OPCODE_ARG* arg) {
1691
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1692
    glsl_src_param_t src0_param;
1693
    char dst_mask[6];
1694
    DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
1695 1696
    IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
    SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
1697 1698
    DWORD sampler_type = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
    glsl_sample_function_t sample_function;
1699

1700 1701
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
    shader_addline(arg->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
1702 1703 1704 1705

    shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
    shader_glsl_get_sample_function(sampler_type, FALSE, &sample_function);
1706 1707

    /* Sample the texture using the calculated coordinates */
1708 1709
    shader_addline(arg->buffer, "%s(Psampler%u, tmp0.xyz)%s);\n", sample_function.name, reg, dst_mask);

1710 1711 1712
    current_state->current_row = 0;
}

1713
/** Process the WINED3DSIO_TEXM3X3 instruction in GLSL
1714 1715
 * Perform the 3rd row of a 3x3 matrix multiply */
void pshader_glsl_texm3x3(SHADER_OPCODE_ARG* arg) {
1716
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1717
    glsl_src_param_t src0_param;
1718
    char dst_mask[6];
1719
    DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
1720 1721
    IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
    SHADER_PARSE_STATE* current_state = &This->baseShader.parse_state;
1722

1723
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
1724 1725 1726

    shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
1727
    shader_addline(arg->buffer, "vec4(tmp.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
1728

1729 1730 1731
    current_state->current_row = 0;
}

1732
/** Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL 
1733 1734 1735 1736
 * Peform the final texture lookup based on the previous 2 3x3 matrix multiplies */
void pshader_glsl_texm3x3spec(SHADER_OPCODE_ARG* arg) {

    IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
1737
    DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
1738 1739
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
1740
    char dst_mask[6];
1741 1742
    SHADER_BUFFER* buffer = arg->buffer;
    SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
1743
    DWORD stype = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
1744 1745
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
    glsl_sample_function_t sample_function;
1746

1747 1748
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], src_mask, &src1_param);
1749 1750

    /* Perform the last matrix multiply operation */
1751
    shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
1752 1753

    /* Calculate reflection vector, 2*(tmp0.src1)*tmp0-src1
H. Verbeet's avatar
H. Verbeet committed
1754
     * This is equivalent to reflect(-src1, tmp0); */
1755
    shader_addline(buffer, "tmp0.xyz = reflect(-(%s), tmp0.xyz);\n", src1_param.param_str);
1756

1757 1758 1759
    shader_glsl_append_dst(buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
    shader_glsl_get_sample_function(stype, FALSE, &sample_function);
1760 1761

    /* Sample the texture */
1762 1763
    shader_addline(buffer, "%s(Psampler%u, tmp0.xyz)%s);\n", sample_function.name, reg, dst_mask);

1764 1765 1766
    current_state->current_row = 0;
}

1767
/** Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL 
1768 1769 1770 1771
 * Peform the final texture lookup based on the previous 2 3x3 matrix multiplies */
void pshader_glsl_texm3x3vspec(SHADER_OPCODE_ARG* arg) {

    IWineD3DPixelShaderImpl* shader = (IWineD3DPixelShaderImpl*) arg->shader;
1772
    DWORD reg = arg->dst & WINED3DSP_REGNUM_MASK;
1773 1774
    SHADER_BUFFER* buffer = arg->buffer;
    SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
1775 1776
    glsl_src_param_t src0_param;
    char dst_mask[6];
1777 1778 1779
    DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
    DWORD sampler_type = arg->reg_maps->samplers[reg] & WINED3DSP_TEXTURETYPE_MASK;
    glsl_sample_function_t sample_function;
1780

1781
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], src_mask, &src0_param);
1782 1783

    /* Perform the last matrix multiply operation */
1784
    shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
1785 1786

    /* Construct the eye-ray vector from w coordinates */
1787 1788
    shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
            current_state->texcoord_w[0], current_state->texcoord_w[1], reg);
1789

1790
    /* Calculate reflection vector (Assume normal is normalized): RF = 2*(tmp0.tmp1)*tmp0-tmp1
H. Verbeet's avatar
H. Verbeet committed
1791
     * This is equivalent to reflect(-tmp1, tmp0); */
1792 1793 1794 1795 1796
    shader_addline(buffer, "tmp0.xyz = reflect(-tmp1.xyz, tmp0.xyz);\n");

    shader_glsl_append_dst(buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
    shader_glsl_get_sample_function(sampler_type, FALSE, &sample_function);
1797

1798
    /* Sample the texture using the calculated coordinates */
1799 1800
    shader_addline(buffer, "%s(Psampler%u, tmp0.xyz)%s);\n", sample_function.name, reg, dst_mask);

1801 1802 1803
    current_state->current_row = 0;
}

1804
/** Process the WINED3DSIO_TEXBEM instruction in GLSL.
1805
 * Apply a fake bump map transform.
1806 1807
 * texbem is pshader <= 1.3 only, this saves a few version checks
 */
1808
void pshader_glsl_texbem(SHADER_OPCODE_ARG* arg) {
1809 1810 1811 1812
    IWineD3DPixelShaderImpl* This = (IWineD3DPixelShaderImpl*) arg->shader;
    IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
    char dst_swizzle[6];
    glsl_sample_function_t sample_function;
1813
    glsl_src_param_t coord_param;
1814 1815
    DWORD sampler_type;
    DWORD sampler_idx;
1816
    DWORD mask;
1817 1818 1819 1820 1821 1822
    DWORD flags;
    char coord_mask[6];

    sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
    flags = deviceImpl->stateBlock->textureState[sampler_idx][WINED3DTSS_TEXTURETRANSFORMFLAGS];

1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833
    sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
    shader_glsl_get_sample_function(sampler_type, FALSE, &sample_function);
    mask = sample_function.coord_mask;

    shader_glsl_get_write_mask(arg->dst, dst_swizzle);

    shader_glsl_get_write_mask(mask, coord_mask);

    /* with projective textures, texbem only divides the static texture coord, not the displacement,
         * so we can't let the GL handle this.
         */
1834
    if (flags & WINED3DTTFF_PROJECTED) {
1835 1836
        DWORD div_mask=0;
        char coord_div_mask[3];
1837 1838
        switch (flags & ~WINED3DTTFF_PROJECTED) {
            case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
1839 1840
            case WINED3DTTFF_COUNT2: div_mask = WINED3DSP_WRITEMASK_1; break;
            case WINED3DTTFF_COUNT3: div_mask = WINED3DSP_WRITEMASK_2; break;
1841
            case WINED3DTTFF_COUNT4:
1842
            case WINED3DTTFF_DISABLE: div_mask = WINED3DSP_WRITEMASK_3; break;
1843
        }
1844 1845
        shader_glsl_get_write_mask(div_mask, coord_div_mask);
        shader_addline(arg->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
1846 1847
    }

1848
    shader_glsl_append_dst(arg->buffer, arg);
1849 1850 1851
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &coord_param);
    shader_addline(arg->buffer, "%s(Psampler%u, T%u%s + vec4(bumpenvmat * %s, 0.0, 0.0)%s )%s);\n",
                   sample_function.name, sampler_idx, sampler_idx, coord_mask, coord_param.param_str, coord_mask, dst_swizzle);
1852 1853
}

1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864
void pshader_glsl_bem(SHADER_OPCODE_ARG* arg) {
    glsl_src_param_t src0_param, src1_param;

    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0|WINED3DSP_WRITEMASK_1, &src1_param);

    shader_glsl_append_dst(arg->buffer, arg);
    shader_addline(arg->buffer, "%s + bumpenvmat * %s);\n",
                   src0_param.param_str, src1_param.param_str);
}

1865
/** Process the WINED3DSIO_TEXREG2AR instruction in GLSL
1866 1867
 * Sample 2D texture at dst using the alpha & red (wx) components of src as texture coordinates */
void pshader_glsl_texreg2ar(SHADER_OPCODE_ARG* arg) {
1868
    glsl_src_param_t src0_param;
1869 1870
    DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
    char dst_mask[6];
1871

1872 1873
    shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
1874
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
1875

1876
    shader_addline(arg->buffer, "texture2D(Psampler%u, %s.wx)%s);\n", sampler_idx, src0_param.reg_name, dst_mask);
1877 1878
}

1879
/** Process the WINED3DSIO_TEXREG2GB instruction in GLSL
1880 1881
 * Sample 2D texture at dst using the green & blue (yz) components of src as texture coordinates */
void pshader_glsl_texreg2gb(SHADER_OPCODE_ARG* arg) {
1882
    glsl_src_param_t src0_param;
1883 1884
    DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
    char dst_mask[6];
1885

1886 1887
    shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
1888
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
1889

1890
    shader_addline(arg->buffer, "texture2D(Psampler%u, %s.yz)%s);\n", sampler_idx, src0_param.reg_name, dst_mask);
1891 1892
}

1893
/** Process the WINED3DSIO_TEXREG2RGB instruction in GLSL
1894 1895
 * Sample texture at dst using the rgb (xyz) components of src as texture coordinates */
void pshader_glsl_texreg2rgb(SHADER_OPCODE_ARG* arg) {
1896
    glsl_src_param_t src0_param;
1897 1898 1899 1900
    char dst_mask[6];
    DWORD sampler_idx = arg->dst & WINED3DSP_REGNUM_MASK;
    DWORD sampler_type = arg->reg_maps->samplers[sampler_idx] & WINED3DSP_TEXTURETYPE_MASK;
    glsl_sample_function_t sample_function;
1901

1902 1903 1904
    shader_glsl_append_dst(arg->buffer, arg);
    shader_glsl_get_write_mask(arg->dst, dst_mask);
    shader_glsl_get_sample_function(sampler_type, FALSE, &sample_function);
1905
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], sample_function.coord_mask, &src0_param);
1906

1907
    shader_addline(arg->buffer, "%s(Psampler%u, %s)%s);\n", sample_function.name, sampler_idx, src0_param.param_str, dst_mask);
1908 1909
}

1910
/** Process the WINED3DSIO_TEXKILL instruction in GLSL.
1911 1912
 * If any of the first 3 components are < 0, discard this pixel */
void pshader_glsl_texkill(SHADER_OPCODE_ARG* arg) {
1913
    glsl_dst_param_t dst_param;
1914

1915 1916
    shader_glsl_add_dst_param(arg, arg->dst, 0, &dst_param);
    shader_addline(arg->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
1917 1918
}

1919
/** Process the WINED3DSIO_DP2ADD instruction in GLSL.
1920 1921
 * dst = dot2(src0, src1) + src2 */
void pshader_glsl_dp2add(SHADER_OPCODE_ARG* arg) {
1922 1923 1924
    glsl_src_param_t src0_param;
    glsl_src_param_t src1_param;
    glsl_src_param_t src2_param;
1925 1926
    DWORD write_mask;
    size_t mask_size;
1927

1928 1929 1930
    write_mask = shader_glsl_append_dst(arg->buffer, arg);
    mask_size = shader_glsl_get_write_mask_size(write_mask);

1931 1932 1933
    shader_glsl_add_src_param(arg, arg->src[0], arg->src_addr[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
    shader_glsl_add_src_param(arg, arg->src[1], arg->src_addr[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
    shader_glsl_add_src_param(arg, arg->src[2], arg->src_addr[2], WINED3DSP_WRITEMASK_0, &src2_param);
1934

1935
    shader_addline(arg->buffer, "dot(%s, %s) + %s);\n", src0_param.param_str, src1_param.param_str, src2_param.param_str);
1936 1937
}

1938 1939
void pshader_glsl_input_pack(
   SHADER_BUFFER* buffer,
1940
   semantic* semantics_in) {
1941 1942 1943

   unsigned int i;

1944
   for (i = 0; i < MAX_REG_INPUT; i++) {
1945

1946 1947 1948
       DWORD usage_token = semantics_in[i].usage;
       DWORD register_token = semantics_in[i].reg;
       DWORD usage, usage_idx;
1949 1950 1951
       char reg_mask[6];

       /* Uninitialized */
1952
       if (!usage_token) continue;
1953 1954
       usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
       usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
1955
       shader_glsl_get_write_mask(register_token, reg_mask);
1956 1957 1958

       switch(usage) {

1959
           case WINED3DDECLUSAGE_COLOR:
1960
               if (usage_idx == 0)
1961
                   shader_addline(buffer, "IN%u%s = vec4(gl_Color)%s;\n",
1962
                       i, reg_mask, reg_mask);
1963
               else if (usage_idx == 1)
1964
                   shader_addline(buffer, "IN%u%s = vec4(gl_SecondaryColor)%s;\n",
1965 1966
                       i, reg_mask, reg_mask);
               else
1967
                   shader_addline(buffer, "IN%u%s = vec4(unsupported_color_input)%s;\n",
1968
                       i, reg_mask, reg_mask);
1969 1970
               break;

1971
           case WINED3DDECLUSAGE_TEXCOORD:
1972
               shader_addline(buffer, "IN%u%s = vec4(gl_TexCoord[%u])%s;\n",
1973
                   i, reg_mask, usage_idx, reg_mask );
1974 1975
               break;

1976
           case WINED3DDECLUSAGE_FOG:
1977
               shader_addline(buffer, "IN%u%s = vec4(gl_FogFragCoord)%s;\n",
1978
                   i, reg_mask, reg_mask);
1979 1980 1981
               break;

           default:
1982
               shader_addline(buffer, "IN%u%s = vec4(unsupported_input)%s;\n",
1983
                   i, reg_mask, reg_mask);
1984 1985 1986 1987 1988 1989 1990 1991 1992 1993
        }
    }
}

/*********************************************
 * Vertex Shader Specific Code begins here
 ********************************************/

void vshader_glsl_output_unpack(
   SHADER_BUFFER* buffer,
1994
   semantic* semantics_out) {
1995 1996 1997

   unsigned int i;

1998
   for (i = 0; i < MAX_REG_OUTPUT; i++) {
1999

2000 2001 2002
       DWORD usage_token = semantics_out[i].usage;
       DWORD register_token = semantics_out[i].reg;
       DWORD usage, usage_idx;
2003 2004 2005
       char reg_mask[6];

       /* Uninitialized */
2006
       if (!usage_token) continue;
2007

2008 2009
       usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
       usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
2010
       shader_glsl_get_write_mask(register_token, reg_mask);
2011

2012
       switch(usage) {
2013

2014
           case WINED3DDECLUSAGE_COLOR:
2015
               if (usage_idx == 0)
2016
                   shader_addline(buffer, "gl_FrontColor%s = OUT%u%s;\n", reg_mask, i, reg_mask);
2017
               else if (usage_idx == 1)
2018
                   shader_addline(buffer, "gl_FrontSecondaryColor%s = OUT%u%s;\n", reg_mask, i, reg_mask);
2019
               else
2020
                   shader_addline(buffer, "unsupported_color_output%s = OUT%u%s;\n", reg_mask, i, reg_mask);
2021 2022
               break;

2023
           case WINED3DDECLUSAGE_POSITION:
2024
               shader_addline(buffer, "gl_Position%s = OUT%u%s;\n", reg_mask, i, reg_mask);
2025
               break;
2026 2027

           case WINED3DDECLUSAGE_TEXCOORD:
2028
               shader_addline(buffer, "gl_TexCoord[%u]%s = OUT%u%s;\n",
2029
                   usage_idx, reg_mask, i, reg_mask);
2030 2031
               break;

2032
           case WINED3DDECLUSAGE_PSIZE:
2033
               shader_addline(buffer, "gl_PointSize = OUT%u.x;\n", i);
2034 2035
               break;

2036
           case WINED3DDECLUSAGE_FOG:
2037
               shader_addline(buffer, "gl_FogFragCoord = OUT%u%s;\n", i, reg_mask);
2038 2039 2040
               break;

           default:
2041
               shader_addline(buffer, "unsupported_output%s = OUT%u%s;\n", reg_mask, i, reg_mask);
2042 2043
       }
    }
2044
}
2045

2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066
static void add_glsl_program_entry(IWineD3DDeviceImpl *device, struct glsl_shader_prog_link *entry) {
    glsl_program_key_t *key;

    key = HeapAlloc(GetProcessHeap(), 0, sizeof(glsl_program_key_t));
    key->vshader = entry->vshader;
    key->pshader = entry->pshader;

    hash_table_put(device->glsl_program_lookup, key, entry);
}

static struct glsl_shader_prog_link *get_glsl_program_entry(IWineD3DDeviceImpl *device,
        GLhandleARB vshader, GLhandleARB pshader) {
    glsl_program_key_t key;

    key.vshader = vshader;
    key.pshader = pshader;

    return (struct glsl_shader_prog_link *)hash_table_get(device->glsl_program_lookup, &key);
}

void delete_glsl_program_entry(IWineD3DDevice *iface, struct glsl_shader_prog_link *entry) {
2067 2068
    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
    WineD3D_GL_Info *gl_info = &((IWineD3DImpl *)(This->wineD3D))->gl_info;
2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081
    glsl_program_key_t *key;

    key = HeapAlloc(GetProcessHeap(), 0, sizeof(glsl_program_key_t));
    key->vshader = entry->vshader;
    key->pshader = entry->pshader;
    hash_table_remove(This->glsl_program_lookup, key);

    GL_EXTCALL(glDeleteObjectARB(entry->programId));
    if (entry->vshader) list_remove(&entry->vshader_entry);
    if (entry->pshader) list_remove(&entry->pshader_entry);
    HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations);
    HeapFree(GetProcessHeap(), 0, entry->puniformF_locations);
    HeapFree(GetProcessHeap(), 0, entry);
2082 2083 2084 2085 2086 2087 2088
}

/** Sets the GLSL program ID for the given pixel and vertex shader combination.
 * It sets the programId on the current StateBlock (because it should be called
 * inside of the DrawPrimitive() part of the render loop).
 *
 * If a program for the given combination does not exist, create one, and store
2089 2090
 * the program in the hash table.  If it creates a program, it will link the
 * given objects, too.
2091
 */
2092
static void set_glsl_shader_program(IWineD3DDevice *iface, BOOL use_ps, BOOL use_vs) {
2093 2094 2095 2096
    IWineD3DDeviceImpl *This               = (IWineD3DDeviceImpl *)iface;
    WineD3D_GL_Info *gl_info               = &((IWineD3DImpl *)(This->wineD3D))->gl_info;
    IWineD3DPixelShader  *pshader          = This->stateBlock->pixelShader;
    IWineD3DVertexShader *vshader          = This->stateBlock->vertexShader;
2097
    struct glsl_shader_prog_link *entry    = NULL;
2098 2099 2100 2101
    GLhandleARB programId                  = 0;
    int i;
    char glsl_name[8];

2102 2103
    GLhandleARB vshader_id = use_vs ? ((IWineD3DBaseShaderImpl*)vshader)->baseShader.prgId : 0;
    GLhandleARB pshader_id = use_ps ? ((IWineD3DBaseShaderImpl*)pshader)->baseShader.prgId : 0;
2104 2105 2106 2107
    entry = get_glsl_program_entry(This, vshader_id, pshader_id);
    if (entry) {
        This->stateBlock->glsl_program = entry;
        return;
2108 2109 2110 2111 2112 2113
    }

    /* If we get to this point, then no matching program exists, so we create one */
    programId = GL_EXTCALL(glCreateProgramObjectARB());
    TRACE("Created new GLSL shader program %u\n", programId);

2114 2115 2116 2117 2118 2119 2120 2121 2122 2123
    /* Create the entry */
    entry = HeapAlloc(GetProcessHeap(), 0, sizeof(struct glsl_shader_prog_link));
    entry->programId = programId;
    entry->vshader = vshader_id;
    entry->pshader = pshader_id;
    /* Add the hash table entry */
    add_glsl_program_entry(This, entry);

    /* Set the current program */
    This->stateBlock->glsl_program = entry;
2124 2125

    /* Attach GLSL vshader */
2126
    if (vshader_id) {
2127 2128 2129
        int max_attribs = 16;   /* TODO: Will this always be the case? It is at the moment... */
        char tmp_name[10];

2130 2131 2132
        TRACE("Attaching GLSL shader object %u to program %u\n", vshader_id, programId);
        GL_EXTCALL(glAttachObjectARB(programId, vshader_id));
        checkGLcall("glAttachObjectARB");
2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147

        /* Bind vertex attributes to a corresponding index number to match
         * the same index numbers as ARB_vertex_programs (makes loading
         * vertex attributes simpler).  With this method, we can use the
         * exact same code to load the attributes later for both ARB and
         * GLSL shaders.
         *
         * We have to do this here because we need to know the Program ID
         * in order to make the bindings work, and it has to be done prior
         * to linking the GLSL program. */
        for (i = 0; i < max_attribs; ++i) {
             snprintf(tmp_name, sizeof(tmp_name), "attrib%i", i);
             GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
        }
        checkGLcall("glBindAttribLocationARB");
2148 2149

        list_add_head(&((IWineD3DBaseShaderImpl *)vshader)->baseShader.linked_programs, &entry->vshader_entry);
2150 2151 2152
    }

    /* Attach GLSL pshader */
2153 2154 2155 2156 2157 2158
    if (pshader_id) {
        TRACE("Attaching GLSL shader object %u to program %u\n", pshader_id, programId);
        GL_EXTCALL(glAttachObjectARB(programId, pshader_id));
        checkGLcall("glAttachObjectARB");

        list_add_head(&((IWineD3DBaseShaderImpl *)pshader)->baseShader.linked_programs, &entry->pshader_entry);
2159 2160 2161 2162 2163 2164 2165
    }

    /* Link the program */
    TRACE("Linking GLSL shader program %u\n", programId);
    GL_EXTCALL(glLinkProgramARB(programId));
    print_glsl_info_log(&GLINFO_LOCATION, programId);

2166
    entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), 0, sizeof(GLhandleARB) * GL_LIMITS(vshader_constantsF));
2167 2168
    for (i = 0; i < GL_LIMITS(vshader_constantsF); ++i) {
        snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i);
2169
        entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
2170
    }
2171
    entry->puniformF_locations = HeapAlloc(GetProcessHeap(), 0, sizeof(GLhandleARB) * GL_LIMITS(pshader_constantsF));
2172 2173
    for (i = 0; i < GL_LIMITS(pshader_constantsF); ++i) {
        snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i);
2174
        entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
2175 2176 2177
    }
}

2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221
static GLhandleARB create_glsl_blt_shader(WineD3D_GL_Info *gl_info) {
    GLhandleARB program_id;
    GLhandleARB vshader_id, pshader_id;
    const char *blt_vshader[] = {
        "void main(void)\n"
        "{\n"
        "    gl_Position = gl_Vertex;\n"
        "    gl_FrontColor = vec4(1.0);\n"
        "    gl_TexCoord[0].x = (gl_Vertex.x * 0.5) + 0.5;\n"
        "    gl_TexCoord[0].y = (-gl_Vertex.y * 0.5) + 0.5;\n"
        "}\n"
    };

    const char *blt_pshader[] = {
        "uniform sampler2D sampler;\n"
        "void main(void)\n"
        "{\n"
        "    gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
        "}\n"
    };

    vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
    GL_EXTCALL(glShaderSourceARB(vshader_id, 1, blt_vshader, NULL));
    GL_EXTCALL(glCompileShaderARB(vshader_id));

    pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
    GL_EXTCALL(glShaderSourceARB(pshader_id, 1, blt_pshader, NULL));
    GL_EXTCALL(glCompileShaderARB(pshader_id));

    program_id = GL_EXTCALL(glCreateProgramObjectARB());
    GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
    GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
    GL_EXTCALL(glLinkProgramARB(program_id));

    print_glsl_info_log(&GLINFO_LOCATION, program_id);

    return program_id;
}

static void shader_glsl_select(IWineD3DDevice *iface, BOOL usePS, BOOL useVS) {
    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
    WineD3D_GL_Info *gl_info = &((IWineD3DImpl *)(This->wineD3D))->gl_info;
    GLhandleARB program_id = 0;

2222
    if (useVS || usePS) set_glsl_shader_program(iface, usePS, useVS);
2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245
    else This->stateBlock->glsl_program = NULL;

    program_id = This->stateBlock->glsl_program ? This->stateBlock->glsl_program->programId : 0;
    if (program_id) TRACE("Using GLSL program %u\n", program_id);
    GL_EXTCALL(glUseProgramObjectARB(program_id));
    checkGLcall("glUseProgramObjectARB");
}

static void shader_glsl_select_depth_blt(IWineD3DDevice *iface) {
    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
    WineD3D_GL_Info *gl_info = &((IWineD3DImpl *)(This->wineD3D))->gl_info;
    static GLhandleARB program_id = 0;
    static GLhandleARB loc = -1;

    if (!program_id) {
        program_id = create_glsl_blt_shader(gl_info);
        loc = GL_EXTCALL(glGetUniformLocationARB(program_id, "sampler"));
    }

    GL_EXTCALL(glUseProgramObjectARB(program_id));
    GL_EXTCALL(glUniform1iARB(loc, 0));
}

2246 2247 2248 2249
static void shader_glsl_cleanup(IWineD3DDevice *iface) {
    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
    WineD3D_GL_Info *gl_info = &((IWineD3DImpl *)(This->wineD3D))->gl_info;
    GL_EXTCALL(glUseProgramObjectARB(0));
2250 2251 2252 2253 2254 2255 2256 2257
}

const shader_backend_t glsl_shader_backend = {
    &shader_glsl_select,
    &shader_glsl_select_depth_blt,
    &shader_glsl_load_constants,
    &shader_glsl_cleanup
};