arb_program_shader.c 336 KB
Newer Older
1 2 3 4 5 6
/*
 * Pixel and vertex shaders implementation using ARB_vertex_program
 * and ARB_fragment_program GL extensions.
 *
 * Copyright 2002-2003 Jason Edmeades
 * Copyright 2002-2003 Raphael Junqueira
7
 * Copyright 2004 Christian Costa
8 9 10
 * Copyright 2005 Oliver Stieber
 * Copyright 2006 Ivan Gyurdiev
 * Copyright 2006 Jason Green
11
 * Copyright 2006 Henri Verbeet
12
 * Copyright 2007-2011, 2013-2014 Stefan Dösinger for CodeWeavers
13
 * Copyright 2009 Henri Verbeet for CodeWeavers
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

#include "config.h"
31
#include "wine/port.h"
32 33 34 35 36 37

#include <stdio.h>

#include "wined3d_private.h"

WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
38
WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
39
WINE_DECLARE_DEBUG_CHANNEL(d3d);
40
WINE_DECLARE_DEBUG_CHANNEL(d3d_perf);
41

42 43 44 45 46 47 48 49 50 51
static BOOL shader_is_pshader_version(enum wined3d_shader_type type)
{
    return type == WINED3D_SHADER_TYPE_PIXEL;
}

static BOOL shader_is_vshader_version(enum wined3d_shader_type type)
{
    return type == WINED3D_SHADER_TYPE_VERTEX;
}

52
static const char *get_line(const char **ptr)
53
{
54
    const char *p, *q;
55 56 57 58 59 60 61 62 63 64 65 66 67

    p = *ptr;
    if (!(q = strstr(p, "\n")))
    {
        if (!*p) return NULL;
        *ptr += strlen(p);
        return p;
    }
    *ptr = q + 1;

    return p;
}

68 69 70 71 72 73
enum arb_helper_value
{
    ARB_ZERO,
    ARB_ONE,
    ARB_TWO,
    ARB_0001,
74
    ARB_EPS,
75 76 77 78 79 80

    ARB_VS_REL_OFFSET
};

static const char *arb_get_helper_value(enum wined3d_shader_type shader, enum arb_helper_value value)
{
81
    if (shader != WINED3D_SHADER_TYPE_VERTEX && shader != WINED3D_SHADER_TYPE_PIXEL)
82
    {
83
        ERR("Unsupported shader type '%s'.\n", debug_shader_type(shader));
84 85 86 87 88 89 90
        return "bad";
    }

    if (shader == WINED3D_SHADER_TYPE_PIXEL)
    {
        switch (value)
        {
91 92
            case ARB_ZERO: return "ps_helper_const.x";
            case ARB_ONE: return "ps_helper_const.y";
93
            case ARB_TWO: return "coefmul.x";
94
            case ARB_0001: return "ps_helper_const.xxxy";
95
            case ARB_EPS: return "ps_helper_const.z";
96 97 98 99 100 101 102
            default: break;
        }
    }
    else
    {
        switch (value)
        {
103
            case ARB_ZERO: return "helper_const.x";
104
            case ARB_ONE: return "helper_const.y";
105
            case ARB_TWO: return "helper_const.z";
106
            case ARB_EPS: return "helper_const.w";
107
            case ARB_0001: return "helper_const.xxxy";
108
            case ARB_VS_REL_OFFSET: return "rel_addr_const.y";
109 110
        }
    }
111
    FIXME("Unmanaged %s shader helper constant requested: %u.\n",
112 113 114 115 116 117 118
          shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value);
    switch (value)
    {
        case ARB_ZERO: return "0.0";
        case ARB_ONE: return "1.0";
        case ARB_TWO: return "2.0";
        case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}";
119
        case ARB_EPS: return "1e-8";
120 121 122 123
        default: return "bad";
    }
}

124
static inline BOOL ffp_clip_emul(const struct wined3d_context *context)
125
{
126
    return context->lowest_disabled_stage < 7;
127 128
}

129
/* ARB_program_shader private data */
130

131
struct control_frame
132
{
133
    struct                          list entry;
134 135 136 137 138 139 140
    enum
    {
        IF,
        IFC,
        LOOP,
        REP
    } type;
141 142
    BOOL                            muting;
    BOOL                            outer_loop;
143 144
    union
    {
145 146 147
        unsigned int                loop;
        unsigned int                ifc;
    } no;
148
    struct wined3d_shader_loop_control loop_control;
149
    BOOL                            had_else;
150 151
};

152 153 154
struct arb_ps_np2fixup_info
{
    struct ps_np2fixup_info         super;
155
    /* For ARB we need an offset value:
156 157 158 159 160 161
     * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a
     * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone"
     * array we need an offset to the index inside the program local parameter array. */
    UINT                            offset;
};

162 163 164
struct arb_ps_compile_args
{
    struct ps_compile_args          super;
165 166
    WORD                            bools;
    WORD                            clip;  /* only a boolean, use a WORD for alignment */
167
    unsigned char                   loop_ctrl[WINED3D_MAX_CONSTS_I][3];
168 169 170 171 172 173 174 175 176 177 178
};

struct stb_const_desc
{
    unsigned char           texunit;
    UINT                    const_num;
};

struct arb_ps_compiled_shader
{
    struct arb_ps_compile_args      args;
179
    struct arb_ps_np2fixup_info     np2fixup_info;
180 181
    struct stb_const_desc           bumpenvmatconst[WINED3D_MAX_TEXTURES];
    struct stb_const_desc           luminanceconst[WINED3D_MAX_TEXTURES];
182
    UINT                            int_consts[WINED3D_MAX_CONSTS_I];
183
    GLuint                          prgId;
184
    UINT                            ycorrection;
185 186
    unsigned char                   numbumpenvmatconsts;
    char                            num_int_consts;
187 188 189 190 191
};

struct arb_vs_compile_args
{
    struct vs_compile_args          super;
192 193 194 195 196
    union
    {
        struct
        {
            WORD                    bools;
197 198
            unsigned char           clip_texcoord;
            unsigned char           clipplane_mask;
199 200
        }                           boolclip;
        DWORD                       boolclip_compare;
201
    } clip;
202
    DWORD                           ps_signature;
203 204
    union
    {
205 206 207
        unsigned char               samplers[4];
        DWORD                       samplers_compare;
    } vertex;
208
    unsigned char                   loop_ctrl[WINED3D_MAX_CONSTS_I][3];
209 210 211 212 213 214
};

struct arb_vs_compiled_shader
{
    struct arb_vs_compile_args      args;
    GLuint                          prgId;
215
    UINT                            int_consts[WINED3D_MAX_CONSTS_I];
216
    char                            num_int_consts;
217
    char                            need_color_unclamp;
218
    UINT                            pos_fixup;
219 220
};

221 222 223 224 225 226 227 228
struct recorded_instruction
{
    struct wined3d_shader_instruction ins;
    struct list entry;
};

struct shader_arb_ctx_priv
{
229
    char addr_reg[50];
230 231
    enum
    {
232 233
        /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */
        ARB,
234
        /* GL_NV_vertex_program2_option or GL_NV_fragment_program_option */
235 236 237 238
        NV2,
        /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */
        NV3
    } target_version;
239

240
    const struct wined3d_gl_info *gl_info;
241 242
    const struct arb_vs_compile_args    *cur_vs_args;
    const struct arb_ps_compile_args    *cur_ps_args;
243
    const struct arb_ps_compiled_shader *compiled_fprog;
244
    const struct arb_vs_compiled_shader *compiled_vprog;
245
    struct arb_ps_np2fixup_info         *cur_np2fixup_info;
246 247 248 249
    struct list                         control_frames;
    struct list                         record;
    BOOL                                recording;
    BOOL                                muted;
250
    unsigned int                        num_loops, loop_depth, num_ifcs;
251
    int                                 aL;
252
    BOOL                                ps_post_process;
253

254
    unsigned int                        vs_clipplanes;
255 256
    BOOL                                footer_written;
    BOOL                                in_main_func;
257

258 259 260 261 262 263 264 265 266 267 268
    /* For 3.0 vertex shaders */
    const char                          *vs_output[MAX_REG_OUTPUT];
    /* For 2.x and earlier vertex shaders */
    const char                          *texcrd_output[8], *color_output[2], *fog_output;

    /* 3.0 pshader input for compatibility with fixed function */
    const char                          *ps_input[MAX_REG_INPUT];
};

struct ps_signature
{
269
    struct wined3d_shader_signature sig;
270 271
    DWORD                               idx;
    struct wine_rb_entry                entry;
272 273
};

274 275 276
struct arb_pshader_private {
    struct arb_ps_compiled_shader   *gl_shaders;
    UINT                            num_gl_shaders, shader_array_size;
277
    DWORD                           input_signature_idx;
278
    DWORD                           clipplane_emulation;
279
    BOOL                            clamp_consts;
280 281
};

282 283 284
struct arb_vshader_private {
    struct arb_vs_compiled_shader   *gl_shaders;
    UINT                            num_gl_shaders, shader_array_size;
285
    UINT rel_offset;
286 287
};

288 289 290 291 292 293 294 295
struct shader_arb_priv
{
    GLuint                  current_vprogram_id;
    GLuint                  current_fprogram_id;
    const struct arb_ps_compiled_shader *compiled_fprog;
    const struct arb_vs_compiled_shader *compiled_vprog;
    BOOL                    use_arbfp_fixed_func;
    struct wine_rb_tree     fragment_shaders;
296
    BOOL                    last_ps_const_clamped;
297
    BOOL                    last_vs_color_unclamp;
298 299 300

    struct wine_rb_tree     signature_tree;
    DWORD ps_sig_number;
301 302

    unsigned int highest_dirty_ps_const, highest_dirty_vs_const;
303
    char vshader_const_dirty[WINED3D_MAX_VS_CONSTS_F];
304
    char pshader_const_dirty[WINED3D_MAX_PS_CONSTS_F];
305
    const struct wined3d_context *last_context;
306

307
    const struct wined3d_vertex_pipe_ops *vertex_pipe;
308
    const struct wined3d_fragment_pipe_ops *fragment_pipe;
309
    BOOL ffp_proj_control;
310 311
};

312 313
/* Context activation for state handlers is done by the caller. */

314
static BOOL need_rel_addr_const(const struct arb_vshader_private *shader_data,
315
        const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
316
{
317
    if (shader_data->rel_offset) return TRUE;
318
    if (!reg_maps->usesmova) return FALSE;
319 320 321 322 323 324 325 326 327 328
    return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION];
}

/* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */
static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info)
{
    return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]
            && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN);
}

329
static BOOL need_helper_const(const struct arb_vshader_private *shader_data,
330
        const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
331
{
332
    if (need_rel_addr_const(shader_data, reg_maps, gl_info)) return TRUE;
333 334 335
    if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */
    if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */
    if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */
336
    if (reg_maps->usesnrm) return TRUE; /* 0.0 */
337
    if (reg_maps->usespow) return TRUE; /* EPS, 0.0 and 1.0 */
338
    if (reg_maps->fog) return TRUE; /* Clamping fog coord, 0.0 and 1.0 */
339 340 341
    return FALSE;
}

342
static unsigned int reserved_vs_const(const struct arb_vshader_private *shader_data,
343
        const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
344 345 346 347
{
    unsigned int ret = 1;
    /* We use one PARAM for the pos fixup, and in some cases one to load
     * some immediate values into the shader. */
348 349
    if (need_helper_const(shader_data, reg_maps, gl_info)) ++ret;
    if (need_rel_addr_const(shader_data, reg_maps, gl_info)) ++ret;
350 351
    return ret;
}
352

353
/* Loads floating point constants into the currently set ARB_vertex/fragment_program.
354
 * When constant_list == NULL, it will load all the constants.
355
 *
356 357 358
 * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders)
 *  or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders)
 */
359
/* Context activation is done by the caller. */
360
static unsigned int shader_arb_load_constants_f(const struct wined3d_shader *shader,
361
        const struct wined3d_gl_info *gl_info, GLuint target_type, unsigned int max_constants,
362
        const struct wined3d_vec4 *constants, char *dirty_consts)
363
{
364
    struct wined3d_shader_lconst *lconst;
365
    unsigned int ret, i, j;
366

367 368
    if (TRACE_ON(d3d_constants))
    {
369 370 371 372 373
        for (i = 0; i < max_constants; ++i)
        {
            if (!dirty_consts[i])
                continue;
            TRACE_(d3d_constants)("Loading constant %u: %s.\n", i, debug_vec4(&constants[i]));
374 375
        }
    }
376 377 378

    i = 0;

379
    /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */
380
    if (target_type == GL_FRAGMENT_PROGRAM_ARB && shader->reg_maps.shader_version.major == 1)
381
    {
382
        float lcl_const[4];
383 384 385
        /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher
         * shaders, the first 8 constants are marked dirty for reload
         */
386 387 388 389
        for (; i < min(8, max_constants); ++i)
        {
            if (!dirty_consts[i])
                continue;
390 391
            dirty_consts[i] = 0;

392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
            if (constants[i].x > 1.0f)
                lcl_const[0] = 1.0f;
            else if (constants[i].x < -1.0f)
                lcl_const[0] = -1.0f;
            else
                lcl_const[0] = constants[i].x;

            if (constants[i].y > 1.0f)
                lcl_const[1] = 1.0f;
            else if (constants[i].y < -1.0f)
                lcl_const[1] = -1.0f;
            else
                lcl_const[1] = constants[i].y;

            if (constants[i].z > 1.0f)
                lcl_const[2] = 1.0f;
            else if (constants[i].z < -1.0f)
                lcl_const[2] = -1.0f;
            else
                lcl_const[2] = constants[i].z;

            if (constants[i].w > 1.0f)
                lcl_const[3] = 1.0f;
            else if (constants[i].w < -1.0f)
                lcl_const[3] = -1.0f;
            else
                lcl_const[3] = constants[i].w;
419 420

            GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const));
421
        }
422

423 424 425 426 427 428 429 430 431
        /* If further constants are dirty, reload them without clamping.
         *
         * The alternative is not to touch them, but then we cannot reset the dirty constant count
         * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code
         * above would always re-check the first 8 constants since max_constant remains at the init
         * value
         */
    }

432 433
    if (gl_info->supported[EXT_GPU_PROGRAM_PARAMETERS])
    {
434 435 436 437 438
        /* TODO: Benchmark if we're better of with finding the dirty constants ourselves,
         * or just reloading *all* constants at once
         *
        GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4)));
         */
439 440 441 442
        for (; i < max_constants; ++i)
        {
            if (!dirty_consts[i])
                continue;
443 444 445 446

            /* Find the next block of dirty constants */
            dirty_consts[i] = 0;
            j = i;
447 448
            for (++i; (i < max_constants) && dirty_consts[i]; ++i)
            {
449
                dirty_consts[i] = 0;
450
            }
451

452
            GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, &constants[j].x));
453
        }
454 455 456 457 458 459 460
    }
    else
    {
        for (; i < max_constants; ++i)
        {
            if (dirty_consts[i])
            {
461
                dirty_consts[i] = 0;
462
                GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, &constants[i].x));
463
            }
464
        }
465 466
    }
    checkGLcall("glProgramEnvParameter4fvARB()");
467 468

    /* Load immediate constants */
469 470 471 472
    if (shader->load_local_constsF)
    {
        if (TRACE_ON(d3d_shader))
        {
473
            LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
474
            {
475 476 477 478 479 480 481
                GLfloat* values = (GLfloat*)lconst->value;
                TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx,
                        values[0], values[1], values[2], values[3]);
            }
        }
        /* Immediate constants are clamped for 1.X shaders at loading times */
        ret = 0;
482
        LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
483
        {
484
            dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */
485
            ret = max(ret, lconst->idx + 1);
486
            GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value));
487
        }
488 489 490 491
        checkGLcall("glProgramEnvParameter4fvARB()");
        return ret; /* The loaded immediate constants need reloading for the next shader */
    } else {
        return 0; /* No constants are dirty now */
492
    }
493 494
}

495 496 497
/* Loads the texture dimensions for NP2 fixup into the currently set
 * ARB_[vertex/fragment]_programs. */
static void shader_arb_load_np2fixup_constants(const struct arb_ps_np2fixup_info *fixup,
498 499
        const struct wined3d_gl_info *gl_info, const struct wined3d_state *state)
{
500
    GLfloat np2fixup_constants[4 * WINED3D_MAX_FRAGMENT_SAMPLERS];
501 502
    WORD active = fixup->super.active;
    UINT i;
503

504 505
    if (!active)
        return;
506

507 508 509 510 511
    for (i = 0; active; active >>= 1, ++i)
    {
        const struct wined3d_texture *tex = state->textures[i];
        unsigned char idx = fixup->super.idx[i];
        GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4];
512

513 514
        if (!(active & 1))
            continue;
515

516 517 518 519
        if (!tex)
        {
            ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n");
            continue;
520 521
        }

522 523 524 525
        if (idx % 2)
        {
            tex_dim[2] = tex->pow2_matrix[0];
            tex_dim[3] = tex->pow2_matrix[5];
526
        }
527 528 529 530 531 532 533 534 535 536 537
        else
        {
            tex_dim[0] = tex->pow2_matrix[0];
            tex_dim[1] = tex->pow2_matrix[5];
        }
    }

    for (i = 0; i < fixup->super.num_consts; ++i)
    {
        GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
                fixup->offset + i, &np2fixup_constants[i * 4]));
538
    }
539 540
}

541
/* Context activation is done by the caller. */
542
static void shader_arb_ps_local_constants(const struct arb_ps_compiled_shader *gl_shader,
543
        const struct wined3d_context_gl *context_gl, const struct wined3d_state *state, unsigned int rt_height)
544
{
545
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
546 547
    unsigned char i;

548
    for(i = 0; i < gl_shader->numbumpenvmatconsts; i++)
549
    {
550 551
        int texunit = gl_shader->bumpenvmatconst[i].texunit;

552
        /* The state manager takes care that this function is always called if the bump env matrix changes */
553
        const float *data = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_MAT00];
554 555
        GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
                gl_shader->bumpenvmatconst[i].const_num, data));
556

557
        if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED)
558
        {
559
            /* WINED3D_TSS_BUMPENVLSCALE and WINED3D_TSS_BUMPENVLOFFSET are next to each other.
560 561
             * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we
             * don't care about them. The pointers are valid for sure because the stateblock is bigger.
562
             * (they're WINED3D_TSS_TEXTURETRANSFORMFLAGS and WINED3D_TSS_ADDRESSW, so most likely 0 or NaN
563
            */
564
            const float *scale = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_LSCALE];
565 566
            GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
                    gl_shader->luminanceconst[i].const_num, scale));
567 568
        }
    }
569
    checkGLcall("Load bumpmap consts");
570

571 572 573 574 575 576 577 578
    if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED)
    {
        /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
        * ycorrection.y: -1.0(onscreen), 1.0(offscreen)
        * ycorrection.z: 1.0
        * ycorrection.w: 0.0
        */
        float val[4];
579 580
        val[0] = context_gl->c.render_offscreen ? 0.0f : (float)rt_height;
        val[1] = context_gl->c.render_offscreen ? 1.0f : -1.0f;
581 582
        val[2] = 1.0f;
        val[3] = 0.0f;
583
        GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val));
584
        checkGLcall("y correction loading");
585 586
    }

587
    if (!gl_shader->num_int_consts) return;
588

589
    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
590 591 592 593
    {
        if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED)
        {
            float val[4];
594 595 596
            val[0] = (float)state->ps_consts_i[i].x;
            val[1] = (float)state->ps_consts_i[i].y;
            val[2] = (float)state->ps_consts_i[i].z;
597
            val[3] = -1.0f;
598 599 600 601

            GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val));
        }
    }
602
    checkGLcall("Load ps int consts");
603 604
}

605
/* Context activation is done by the caller. */
606
static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *gl_shader,
607
        const struct wined3d_context_gl *context_gl, const struct wined3d_state *state)
608
{
609
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
610
    float position_fixup[4];
611
    unsigned char i;
612

613
    /* Upload the position fixup */
614
    shader_get_position_fixup(&context_gl->c, state, 1, position_fixup);
615
    GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup));
616

617
    if (!gl_shader->num_int_consts) return;
618

619
    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
620 621 622 623
    {
        if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED)
        {
            float val[4];
624 625 626
            val[0] = (float)state->vs_consts_i[i].x;
            val[1] = (float)state->vs_consts_i[i].y;
            val[2] = (float)state->vs_consts_i[i].z;
627
            val[3] = -1.0f;
628 629 630 631

            GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val));
        }
    }
632
    checkGLcall("Load vs int consts");
633
}
634

635
static void shader_arb_select(void *shader_priv, struct wined3d_context *context,
636
        const struct wined3d_state *state);
637

638 639
/**
 * Loads the app-supplied constants into the currently set ARB_[vertex/fragment]_programs.
640 641
 *
 * We only support float constants in ARB at the moment, so don't
642 643
 * worry about the Integers or Booleans
 */
644
/* Context activation is done by the caller (state handler). */
645 646
static void shader_arb_load_constants_internal(struct shader_arb_priv *priv, struct wined3d_context_gl *context_gl,
        const struct wined3d_state *state, BOOL use_ps, BOOL use_vs, BOOL from_shader_select)
647
{
648
    const struct wined3d_d3d_info *d3d_info = context_gl->c.d3d_info;
649
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
650

651 652
    if (!from_shader_select)
    {
653 654 655
        const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX];
        const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL];

656
        if (vshader
657
                && (vshader->reg_maps.boolean_constants
658
                || (!gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]
659
                && (vshader->reg_maps.integer_constants & ~vshader->reg_maps.local_int_consts))))
660 661
        {
            TRACE("bool/integer vertex shader constants potentially modified, forcing shader reselection.\n");
662
            shader_arb_select(priv, &context_gl->c, state);
663 664
        }
        else if (pshader
665
                && (pshader->reg_maps.boolean_constants
666
                || (!gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]
667
                && (pshader->reg_maps.integer_constants & ~pshader->reg_maps.local_int_consts))))
668 669
        {
            TRACE("bool/integer pixel shader constants potentially modified, forcing shader reselection.\n");
670
            shader_arb_select(priv, &context_gl->c, state);
671 672 673
        }
    }

674
    if (&context_gl->c != priv->last_context)
675 676
    {
        memset(priv->vshader_const_dirty, 1,
677 678
                sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count);
        priv->highest_dirty_vs_const = d3d_info->limits.vs_uniform_count;
679 680

        memset(priv->pshader_const_dirty, 1,
681 682
                sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count);
        priv->highest_dirty_ps_const = d3d_info->limits.ps_uniform_count;
683

684
        priv->last_context = &context_gl->c;
685 686
    }

687
    if (use_vs)
688
    {
689
        const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX];
690
        const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog;
691 692

        /* Load DirectX 9 float constants for vertex shader */
693
        priv->highest_dirty_vs_const = shader_arb_load_constants_f(vshader, gl_info, GL_VERTEX_PROGRAM_ARB,
694
                priv->highest_dirty_vs_const, state->vs_consts_f, priv->vshader_const_dirty);
695
        shader_arb_vs_local_constants(gl_shader, context_gl, state);
696 697
    }

698
    if (use_ps)
699
    {
700
        const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
701
        const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog;
702
        UINT rt_height = state->fb.render_targets[0]->height;
703

704
        /* Load DirectX 9 float constants for pixel shader */
705
        priv->highest_dirty_ps_const = shader_arb_load_constants_f(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB,
706
                priv->highest_dirty_ps_const, state->ps_consts_f, priv->pshader_const_dirty);
707
        shader_arb_ps_local_constants(gl_shader, context_gl, state, rt_height);
708

709
        if (context_gl->c.constant_update_mask & WINED3D_SHADER_CONST_PS_NP2_FIXUP)
710
            shader_arb_load_np2fixup_constants(&gl_shader->np2fixup_info, gl_info, state);
711 712 713
    }
}

714
static void shader_arb_load_constants(void *shader_priv, struct wined3d_context *context,
715
        const struct wined3d_state *state)
716
{
717 718
    shader_arb_load_constants_internal(shader_priv, wined3d_context_gl(context),
            state, use_ps(state), use_vs(state), FALSE);
719 720
}

721
static void shader_arb_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count)
722
{
723
    struct wined3d_context_gl *context_gl = wined3d_context_gl_get_current();
724
    struct shader_arb_priv *priv = device->shader_priv;
725 726 727

    /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
     * context. On a context switch the old context will be fully dirtified */
728
    if (!context_gl || context_gl->c.device != device)
729
        return;
730

731 732
    memset(priv->vshader_const_dirty + start, 1, sizeof(*priv->vshader_const_dirty) * count);
    priv->highest_dirty_vs_const = max(priv->highest_dirty_vs_const, start + count);
733 734
}

735
static void shader_arb_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count)
736
{
737
    struct wined3d_context_gl *context_gl = wined3d_context_gl_get_current();
738
    struct shader_arb_priv *priv = device->shader_priv;
739 740 741

    /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
     * context. On a context switch the old context will be fully dirtified */
742
    if (!context_gl || context_gl->c.device != device)
743
        return;
744

745 746
    memset(priv->pshader_const_dirty + start, 1, sizeof(*priv->pshader_const_dirty) * count);
    priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, start + count);
747 748
}

749
static void shader_arb_append_imm_vec4(struct wined3d_string_buffer *buffer, const float *values)
750
{
751
    char str[4][17];
752 753 754 755 756 757 758 759

    wined3d_ftoa(values[0], str[0]);
    wined3d_ftoa(values[1], str[1]);
    wined3d_ftoa(values[2], str[2]);
    wined3d_ftoa(values[3], str[3]);
    shader_addline(buffer, "{%s, %s, %s, %s}", str[0], str[1], str[2], str[3]);
}

760
/* Generate the variable & register declarations for the ARB_vertex_program output target */
761
static void shader_generate_arb_declarations(const struct wined3d_shader *shader,
762
        const struct wined3d_shader_reg_maps *reg_maps, struct wined3d_string_buffer *buffer,
763 764
        const struct wined3d_gl_info *gl_info, DWORD *num_clipplanes,
        const struct shader_arb_ctx_priv *ctx)
765
{
766
    DWORD i;
767
    char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
768
    const struct wined3d_shader_lconst *lconst;
769
    unsigned max_constantsF;
770
    DWORD map;
771

772 773 774 775 776 777 778
    /* In pixel shaders, all private constants are program local, we don't need anything
     * from program.env. Thus we can advertise the full set of constants in pixel shaders.
     * If we need a private constant the GL implementation will squeeze it in somewhere
     *
     * With vertex shaders we need the posFixup and on some GL implementations 4 helper
     * immediate values. The posFixup is loaded using program.env for now, so always
     * subtract one from the number of constants. If the shader uses indirect addressing,
779
     * account for the helper const too because we have to declare all available d3d constants
780 781
     * and don't know which are actually used.
     */
782 783
    if (pshader)
    {
784
        max_constantsF = gl_info->limits.arb_ps_native_constants;
785 786 787
        /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */
        if (max_constantsF < 24)
            max_constantsF = gl_info->limits.arb_ps_float_constants;
788 789 790
    }
    else
    {
791
        const struct arb_vshader_private *shader_data = shader->backend_data;
792 793 794 795 796 797 798
        max_constantsF = gl_info->limits.arb_vs_native_constants;
        /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value.
         * Also prevents max_constantsF from becoming less than 0 and
         * wrapping . */
        if (max_constantsF < 96)
            max_constantsF = gl_info->limits.arb_vs_float_constants;

799 800
        if (reg_maps->usesrelconstF)
        {
801
            DWORD highest_constf = 0, clip_limit;
802

803
            max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info);
804
            max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
805
            max_constantsF -= gl_info->reserved_arb_constants;
806

807
            for (i = 0; i < shader->limits->constant_float; ++i)
808
            {
809
                if (wined3d_bitmap_is_set(reg_maps->constf, i))
810
                    highest_constf = i;
811
            }
812

813 814
            if(use_nv_clip(gl_info) && ctx->target_version >= NV2)
            {
815
                if(ctx->cur_vs_args->super.clip_enabled)
816
                    clip_limit = gl_info->limits.user_clip_distances;
817 818
                else
                    clip_limit = 0;
819 820 821
            }
            else
            {
822
                unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask;
823
                clip_limit = min(wined3d_popcount(mask), 4);
824
            }
825 826 827 828
            *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1);
            max_constantsF -= *num_clipplanes;
            if(*num_clipplanes < clip_limit)
            {
829 830
                WARN("Only %u clip planes out of %u enabled.\n", *num_clipplanes,
                        gl_info->limits.user_clip_distances);
831
            }
832 833 834
        }
        else
        {
835 836 837 838
            if (ctx->target_version >= NV2)
                *num_clipplanes = gl_info->limits.user_clip_distances;
            else
                *num_clipplanes = min(gl_info->limits.user_clip_distances, 4);
839 840
        }
    }
841

842 843 844
    for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
    {
        if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i);
845 846
    }

847 848 849
    for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
    {
        if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i);
850 851
    }

852 853 854 855 856
    if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3)
    {
        for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
        {
            if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i);
857
        }
858 859
    }

860
    if (!shader->load_local_constsF)
861
    {
862
        LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
863
        {
864 865
            const float *value;
            value = (const float *)lconst->value;
866 867 868
            shader_addline(buffer, "PARAM C%u = ", lconst->idx);
            shader_arb_append_imm_vec4(buffer, value);
            shader_addline(buffer, ";\n");
869 870 871
        }
    }

872 873 874
    /* After subtracting privately used constants from the hardware limit(they are loaded as
     * local constants), make sure the shader doesn't violate the env constant limit
     */
875
    if (pshader)
876
    {
877
        max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants);
878 879 880
    }
    else
    {
881
        max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants);
882 883 884
    }

    /* Avoid declaring more constants than needed */
885
    max_constantsF = min(max_constantsF, shader->limits->constant_float);
886

887 888 889 890 891 892
    /* we use the array-based constants array if the local constants are marked for loading,
     * because then we use indirect addressing, or when the local constant list is empty,
     * because then we don't know if we're using indirect addressing or not. If we're hardcoding
     * local constants do not declare the loaded constants as an array because ARB compilers usually
     * do not optimize unused constants away
     */
893 894
    if (reg_maps->usesrelconstF)
    {
895 896 897
        /* Need to PARAM the environment parameters (constants) so we can use relative addressing */
        shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n",
                    max_constantsF, max_constantsF - 1);
898 899 900 901 902
    }
    else
    {
        for (i = 0; i < max_constantsF; ++i)
        {
903
            if (!shader_constant_is_local(shader, i) && wined3d_extract_bits(reg_maps->constf, i, 1))
904
            {
905 906 907 908
                shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i);
            }
        }
    }
909 910
}

911
static const char * const shift_tab[] = {
912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
    "dummy",     /*  0 (none) */
    "coefmul.x", /*  1 (x2)   */
    "coefmul.y", /*  2 (x4)   */
    "coefmul.z", /*  3 (x8)   */
    "coefmul.w", /*  4 (x16)  */
    "dummy",     /*  5 (x32)  */
    "dummy",     /*  6 (x64)  */
    "dummy",     /*  7 (x128) */
    "dummy",     /*  8 (d256) */
    "dummy",     /*  9 (d128) */
    "dummy",     /* 10 (d64)  */
    "dummy",     /* 11 (d32)  */
    "coefdiv.w", /* 12 (d16)  */
    "coefdiv.z", /* 13 (d8)   */
    "coefdiv.y", /* 14 (d4)   */
    "coefdiv.x"  /* 15 (d2)   */
};

930
static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins,
931
        const struct wined3d_shader_dst_param *dst, char *write_mask)
932
{
933
    char *ptr = write_mask;
934

935
    if (dst->write_mask != WINED3DSP_WRITEMASK_ALL)
936
    {
937
        *ptr++ = '.';
938 939 940 941
        if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
        if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
        if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
        if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
942 943
    }

944
    *ptr = '\0';
945 946
}

947 948
static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str)
{
949 950 951 952 953 954 955
    /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
     * but addressed as "rgba". To fix this we need to swap the register's x
     * and z components. */
    const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
    char *ptr = swizzle_str;

    /* swizzle bits fields: wwzzyyxx */
956
    DWORD swizzle = param->swizzle;
957 958 959 960 961 962 963
    DWORD swizzle_x = swizzle & 0x03;
    DWORD swizzle_y = (swizzle >> 2) & 0x03;
    DWORD swizzle_z = (swizzle >> 4) & 0x03;
    DWORD swizzle_w = (swizzle >> 6) & 0x03;

    /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to
     * generate a swizzle string. Unless we need to our own swizzling. */
964 965
    if (swizzle != WINED3DSP_NOSWIZZLE || fixup)
    {
966 967 968 969 970 971 972 973 974
        *ptr++ = '.';
        if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) {
            *ptr++ = swizzle_chars[swizzle_x];
        } else {
            *ptr++ = swizzle_chars[swizzle_x];
            *ptr++ = swizzle_chars[swizzle_y];
            *ptr++ = swizzle_chars[swizzle_z];
            *ptr++ = swizzle_chars[swizzle_w];
        }
975
    }
976

977
    *ptr = '\0';
978 979
}

980 981 982
static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src)
{
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
983
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
984

985 986
    if (!strcmp(priv->addr_reg, src))
        return;
987

988
    snprintf(priv->addr_reg, sizeof(priv->addr_reg), "%s", src);
989 990 991
    shader_addline(buffer, "ARL A0.x, %s;\n", src);
}

992 993 994 995
static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
        const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr);

static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins,
996
        const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color)
997
{
998
    /* oPos, oFog and oPts in D3D */
999
    static const char * const rastout_reg_names[] = {"TMP_OUT", "TMP_FOGCOORD", "result.pointsize"};
1000
    const struct wined3d_shader *shader = ins->ctx->shader;
1001
    const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
1002
    BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1003
    struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1004 1005

    *is_color = FALSE;
1006

1007
    switch (reg->type)
1008
    {
1009
        case WINED3DSPR_TEMP:
1010
            sprintf(register_name, "R%u", reg->idx[0].offset);
1011 1012 1013 1014 1015
            break;

        case WINED3DSPR_INPUT:
            if (pshader)
            {
1016
                if (reg_maps->shader_version.major < 3)
1017
                {
1018 1019 1020 1021
                    if (!reg->idx[0].offset)
                        strcpy(register_name, "fragment.color.primary");
                    else
                        strcpy(register_name, "fragment.color.secondary");
1022 1023 1024
                }
                else
                {
1025
                    if (reg->idx[0].rel_addr)
1026 1027
                    {
                        char rel_reg[50];
1028
                        shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg);
1029

1030
                        if (!strcmp(rel_reg, "**aL_emul**"))
1031
                        {
1032
                            DWORD idx = ctx->aL + reg->idx[0].offset;
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
                            if(idx < MAX_REG_INPUT)
                            {
                                strcpy(register_name, ctx->ps_input[idx]);
                            }
                            else
                            {
                                ERR("Pixel shader input register out of bounds: %u\n", idx);
                                sprintf(register_name, "out_of_bounds_%u", idx);
                            }
                        }
1043
                        else if (reg_maps->input_registers & 0x0300)
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
                        {
                            /* There are two ways basically:
                             *
                             * 1) Use the unrolling code that is used for loop emulation and unroll the loop.
                             *    That means trouble if the loop also contains a breakc or if the control values
                             *    aren't local constants.
                             * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the
                             *    source dynamically. The trouble is that we cannot simply read aL.y because it
                             *    is an ADDRESS register. We could however push it, load .zw with a value and use
                             *    ADAC to load the condition code register and pop it again afterwards
                             */
                            FIXME("Relative input register addressing with more than 8 registers\n");

                            /* This is better than nothing for now */
1058
                            sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1059
                        }
1060
                        else if(ctx->cur_ps_args->super.vp_mode != WINED3D_VP_MODE_SHADER)
1061 1062 1063 1064 1065 1066 1067 1068 1069
                        {
                            /* This is problematic because we'd have to consult the ctx->ps_input strings
                             * for where to find the varying. Some may be "0.0", others can be texcoords or
                             * colors. This needs either a pipeline replacement to make the vertex shader feed
                             * proper varyings, or loop unrolling
                             *
                             * For now use the texcoords and hope for the best
                             */
                            FIXME("Non-vertex shader varying input with indirect addressing\n");
1070
                            sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1071 1072 1073 1074 1075 1076
                        }
                        else
                        {
                            /* D3D supports indirect addressing only with aL in loop registers. The loop instruction
                             * pulls GL_NV_fragment_program2 in
                             */
1077
                            sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1078 1079 1080 1081
                        }
                    }
                    else
                    {
1082
                        if (reg->idx[0].offset < MAX_REG_INPUT)
1083
                        {
1084
                            strcpy(register_name, ctx->ps_input[reg->idx[0].offset]);
1085 1086 1087
                        }
                        else
                        {
1088 1089
                            ERR("Pixel shader input register out of bounds: %u\n", reg->idx[0].offset);
                            sprintf(register_name, "out_of_bounds_%u", reg->idx[0].offset);
1090 1091 1092
                        }
                    }
                }
1093 1094 1095
            }
            else
            {
1096
                if (ctx->cur_vs_args->super.swizzle_map & (1u << reg->idx[0].offset))
1097 1098
                    *is_color = TRUE;
                sprintf(register_name, "vertex.attrib[%u]", reg->idx[0].offset);
1099 1100 1101 1102
            }
            break;

        case WINED3DSPR_CONST:
1103
            if (!pshader && reg->idx[0].rel_addr)
1104
            {
1105
                const struct arb_vshader_private *shader_data = shader->backend_data;
1106
                UINT rel_offset = ctx->target_version == ARB ? shader_data->rel_offset : 0;
1107
                BOOL aL = FALSE;
1108
                char rel_reg[50];
1109 1110
                if (reg_maps->shader_version.major < 2)
                {
1111
                    sprintf(rel_reg, "A0.x");
1112 1113 1114 1115 1116 1117
                }
                else
                {
                    shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg);
                    if (ctx->target_version == ARB)
                    {
1118 1119
                        if (!strcmp(rel_reg, "**aL_emul**"))
                        {
1120 1121 1122 1123 1124
                            aL = TRUE;
                        } else {
                            shader_arb_request_a0(ins, rel_reg);
                            sprintf(rel_reg, "A0.x");
                        }
1125
                    }
1126
                }
1127 1128 1129 1130
                if (aL)
                    sprintf(register_name, "C[%u]", ctx->aL + reg->idx[0].offset);
                else if (reg->idx[0].offset >= rel_offset)
                    sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx[0].offset - rel_offset);
1131
                else
1132
                    sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx[0].offset);
1133 1134 1135
            }
            else
            {
1136
                if (reg_maps->usesrelconstF)
1137
                    sprintf(register_name, "C[%u]", reg->idx[0].offset);
1138
                else
1139
                    sprintf(register_name, "C%u", reg->idx[0].offset);
1140 1141 1142 1143
            }
            break;

        case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1144 1145 1146 1147
            if (pshader)
            {
                if (reg_maps->shader_version.major == 1
                        && reg_maps->shader_version.minor <= 3)
1148 1149 1150 1151 1152 1153 1154 1155
                    /* In ps <= 1.3, Tx is a temporary register as destination
                     * to all instructions, and as source to most instructions.
                     * For some instructions it is the texcoord input. Those
                     * instructions know about the special use. */
                    sprintf(register_name, "T%u", reg->idx[0].offset);
                else
                    /* In ps 1.4 and 2.x Tx is always a (read-only) varying. */
                    sprintf(register_name, "fragment.texcoord[%u]", reg->idx[0].offset);
1156
            }
1157 1158
            else
            {
1159
                if (reg_maps->shader_version.major == 1 || ctx->target_version >= NV2)
1160
                    sprintf(register_name, "A%u", reg->idx[0].offset);
1161
                else
1162
                    sprintf(register_name, "A%u_SHADOW", reg->idx[0].offset);
1163
            }
1164 1165 1166
            break;

        case WINED3DSPR_COLOROUT:
1167
            if (ctx->ps_post_process && !reg->idx[0].offset)
1168
            {
1169 1170 1171 1172
                strcpy(register_name, "TMP_COLOR");
            }
            else
            {
1173 1174
                if (ctx->cur_ps_args->super.srgb_correction)
                    FIXME("sRGB correction on higher render targets.\n");
1175
                if (reg_maps->rt_mask > 1)
1176
                    sprintf(register_name, "result.color[%u]", reg->idx[0].offset);
1177 1178
                else
                    strcpy(register_name, "result.color");
1179 1180 1181 1182
            }
            break;

        case WINED3DSPR_RASTOUT:
1183 1184 1185 1186
            if (reg->idx[0].offset == 1)
                sprintf(register_name, "%s", ctx->fog_output);
            else
                sprintf(register_name, "%s", rastout_reg_names[reg->idx[0].offset]);
1187 1188 1189 1190 1191 1192 1193
            break;

        case WINED3DSPR_DEPTHOUT:
            strcpy(register_name, "result.depth");
            break;

        case WINED3DSPR_ATTROUT:
1194
        /* case WINED3DSPR_OUTPUT: */
1195 1196 1197 1198
            if (pshader)
                sprintf(register_name, "oD[%u]", reg->idx[0].offset);
            else
                strcpy(register_name, ctx->color_output[reg->idx[0].offset]);
1199 1200 1201
            break;

        case WINED3DSPR_TEXCRDOUT:
1202
            if (pshader)
1203 1204 1205
                sprintf(register_name, "oT[%u]", reg->idx[0].offset);
            else if (reg_maps->shader_version.major < 3)
                strcpy(register_name, ctx->texcrd_output[reg->idx[0].offset]);
1206
            else
1207
                strcpy(register_name, ctx->vs_output[reg->idx[0].offset]);
1208 1209
            break;

1210
        case WINED3DSPR_LOOP:
1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
            if(ctx->target_version >= NV2)
            {
                /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */
                if(pshader) sprintf(register_name, "A0.x");
                else sprintf(register_name, "aL.y");
            }
            else
            {
                /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value
                 * would be valid, but if aL is used for indexing(its only use), there's likely an offset,
                 * thus the result would be something like C[15 + 30], which is not valid in the ARB program
                 * grammar. So return a marker for the emulated aL and intercept it in constant and varying
                 * indexing
                 */
                sprintf(register_name, "**aL_emul**");
            }

1228 1229 1230
            break;

        case WINED3DSPR_CONSTINT:
1231
            sprintf(register_name, "I%u", reg->idx[0].offset);
1232 1233
            break;

1234
        case WINED3DSPR_MISCTYPE:
1235
            if (!reg->idx[0].offset)
1236
                sprintf(register_name, "vpos");
1237
            else if (reg->idx[0].offset == 1)
1238 1239
                sprintf(register_name, "fragment.facing.x");
            else
1240
                FIXME("Unknown MISCTYPE register index %u.\n", reg->idx[0].offset);
1241 1242
            break;

1243
        default:
1244 1245
            FIXME("Unhandled register type %#x[%u].\n", reg->type, reg->idx[0].offset);
            sprintf(register_name, "unrecognized_register[%u]", reg->idx[0].offset);
1246
            break;
1247 1248 1249
    }
}

1250
static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins,
1251 1252 1253 1254 1255 1256
        const struct wined3d_shader_dst_param *wined3d_dst, char *str)
{
    char register_name[255];
    char write_mask[6];
    BOOL is_color;

1257
    shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color);
1258
    strcpy(str, register_name);
1259

1260
    shader_arb_get_write_mask(ins, wined3d_dst, write_mask);
1261
    strcat(str, write_mask);
1262 1263
}

1264
static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source)
1265
{
1266 1267 1268 1269 1270 1271 1272 1273
    switch(channel_source)
    {
        case CHANNEL_SOURCE_ZERO: return "0";
        case CHANNEL_SOURCE_ONE: return "1";
        case CHANNEL_SOURCE_X: return "x";
        case CHANNEL_SOURCE_Y: return "y";
        case CHANNEL_SOURCE_Z: return "z";
        case CHANNEL_SOURCE_W: return "w";
1274
        default:
1275 1276
            FIXME("Unhandled channel source %#x\n", channel_source);
            return "undefined";
1277
    }
1278 1279
}

1280
struct color_fixup_masks
1281
{
1282 1283 1284 1285 1286 1287 1288
    DWORD source;
    DWORD sign;
};

static struct color_fixup_masks calc_color_correction(struct color_fixup_desc fixup, DWORD dst_mask)
{
    struct color_fixup_masks masks = {0, 0};
1289

1290
    if (is_complex_fixup(fixup))
1291
    {
1292 1293
        enum complex_fixup complex_fixup = get_complex_fixup(fixup);
        FIXME("Complex fixup (%#x) not supported\n", complex_fixup);
1294
        return masks;
1295
    }
1296

1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
    if (fixup.x_source != CHANNEL_SOURCE_X)
        masks.source |= WINED3DSP_WRITEMASK_0;
    if (fixup.y_source != CHANNEL_SOURCE_Y)
        masks.source |= WINED3DSP_WRITEMASK_1;
    if (fixup.z_source != CHANNEL_SOURCE_Z)
        masks.source |= WINED3DSP_WRITEMASK_2;
    if (fixup.w_source != CHANNEL_SOURCE_W)
        masks.source |= WINED3DSP_WRITEMASK_3;
    masks.source &= dst_mask;

    if (fixup.x_sign_fixup)
        masks.sign |= WINED3DSP_WRITEMASK_0;
    if (fixup.y_sign_fixup)
        masks.sign |= WINED3DSP_WRITEMASK_1;
    if (fixup.z_sign_fixup)
        masks.sign |= WINED3DSP_WRITEMASK_2;
    if (fixup.w_sign_fixup)
        masks.sign |= WINED3DSP_WRITEMASK_3;
    masks.sign &= dst_mask;

    return masks;
}

static void gen_color_correction(struct wined3d_string_buffer *buffer, const char *dst,
        const char *src, const char *one, const char *two,
        struct color_fixup_desc fixup, struct color_fixup_masks masks)
{
    const char *sign_fixup_src = dst;
1325

1326
    if (masks.source)
1327
    {
1328 1329 1330 1331
        if (masks.sign)
            sign_fixup_src = "TA";

        shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", sign_fixup_src, src,
1332 1333 1334
                shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source),
                shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source));
    }
1335 1336 1337 1338
    else if (masks.sign)
    {
        sign_fixup_src = src;
    }
1339

1340
    if (masks.sign)
1341 1342 1343
    {
        char reg_mask[6];
        char *ptr = reg_mask;
1344

1345
        if (masks.sign != WINED3DSP_WRITEMASK_ALL)
1346 1347
        {
            *ptr++ = '.';
1348 1349 1350 1351 1352 1353 1354 1355
            if (masks.sign & WINED3DSP_WRITEMASK_0)
                *ptr++ = 'x';
            if (masks.sign & WINED3DSP_WRITEMASK_1)
                *ptr++ = 'y';
            if (masks.sign & WINED3DSP_WRITEMASK_2)
                *ptr++ = 'z';
            if (masks.sign & WINED3DSP_WRITEMASK_3)
                *ptr++ = 'w';
1356
        }
1357 1358
        *ptr = '\0';

1359
        shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", dst, reg_mask, sign_fixup_src, two, one);
1360
    }
1361
}
1362

1363 1364 1365
static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins)
{
    DWORD mod;
1366
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1367 1368 1369
    if (!ins->dst_count) return "";

    mod = ins->dst[0].modifiers;
1370

1371
    /* Silently ignore PARTIALPRECISION if it's not supported */
1372 1373
    if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION;

1374 1375 1376 1377 1378
    if(mod & WINED3DSPDM_MSAMPCENTROID)
    {
        FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n");
        mod &= ~WINED3DSPDM_MSAMPCENTROID;
    }
1379 1380

    switch(mod)
1381
    {
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396
        case WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION:
            return "H_SAT";

        case WINED3DSPDM_SATURATE:
            return "_SAT";

        case WINED3DSPDM_PARTIALPRECISION:
            return "H";

        case 0:
            return "";

        default:
            FIXME("Unknown modifiers 0x%08x\n", mod);
            return "";
1397 1398 1399
    }
}

1400 1401 1402 1403 1404
#define TEX_PROJ        0x1
#define TEX_BIAS        0x2
#define TEX_LOD         0x4
#define TEX_DERIV       0x10

1405
static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx,
1406
        const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy)
1407
{
1408 1409
    BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1410
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1411 1412 1413
    enum wined3d_shader_resource_type resource_type;
    struct color_fixup_masks masks;
    const char *tex_dst = dst_str;
1414
    BOOL np2_fixup = FALSE;
1415
    const char *tex_type;
1416
    const char *mod;
1417

1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
    if (pshader)
    {
        resource_type = pixelshader_get_resource_type(ins->ctx->reg_maps, sampler_idx,
                priv->cur_ps_args->super.tex_types);
    }
    else
    {
        resource_type = ins->ctx->reg_maps->resource_info[sampler_idx].type;

        /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */
        sampler_idx += WINED3D_MAX_FRAGMENT_SAMPLERS;
    }
1430

1431 1432 1433
    switch (resource_type)
    {
        case WINED3D_SHADER_RESOURCE_TEXTURE_1D:
1434 1435 1436
            tex_type = "1D";
            break;

1437
        case WINED3D_SHADER_RESOURCE_TEXTURE_2D:
1438
            if (pshader && priv->cur_ps_args->super.np2_fixup & (1u << sampler_idx)
1439
                    && priv->gl_info->supported[ARB_TEXTURE_RECTANGLE])
1440
                tex_type = "RECT";
1441
            else
1442
                tex_type = "2D";
1443 1444

            if (pshader)
1445
            {
1446
                if (priv->cur_np2fixup_info->super.active & (1u << sampler_idx))
1447
                {
1448 1449
                    if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n");
                    else np2_fixup = TRUE;
1450
                }
1451
            }
1452 1453
            break;

1454
        case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
1455 1456 1457
            tex_type = "3D";
            break;

1458
        case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
1459 1460 1461 1462
            tex_type = "CUBE";
            break;

        default:
1463
            ERR("Unexpected resource type %#x.\n", resource_type);
1464 1465 1466
            tex_type = "";
    }

1467 1468 1469 1470 1471 1472
    /* TEX, TXL, TXD and TXP do not support the "H" modifier,
     * so don't use shader_arb_get_modifier
     */
    if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT";
    else mod = "";

1473
    /* Fragment samplers always have identity mapping */
1474
    if(sampler_idx >= WINED3D_MAX_FRAGMENT_SAMPLERS)
1475
    {
1476
        sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - WINED3D_MAX_FRAGMENT_SAMPLERS];
1477 1478
    }

1479 1480 1481 1482 1483 1484 1485 1486 1487
    if (pshader)
    {
        masks = calc_color_correction(priv->cur_ps_args->super.color_fixup[sampler_idx],
                ins->dst[0].write_mask);

        if (masks.source || masks.sign)
            tex_dst = "TA";
    }

1488 1489
    if (flags & TEX_DERIV)
    {
1490 1491
        if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n");
        if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n");
1492 1493
        shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg,
                       dsx, dsy, sampler_idx, tex_type);
1494
    }
1495 1496 1497 1498
    else if(flags & TEX_LOD)
    {
        if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n");
        if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n");
1499
        shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg,
1500
                       sampler_idx, tex_type);
1501
    }
1502
    else if (flags & TEX_BIAS)
1503
    {
1504
        /* Shouldn't be possible, but let's check for it */
1505
        if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n");
1506
        /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */
1507
        shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1508 1509 1510
    }
    else if (flags & TEX_PROJ)
    {
1511
        shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1512 1513 1514
    }
    else
    {
1515 1516 1517 1518 1519 1520
        if (np2_fixup)
        {
            const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx];
            shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1,
                           (idx % 2) ? "zwxy" : "xyzw", coord_reg);

1521
            shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, tex_dst, sampler_idx, tex_type);
1522 1523
        }
        else
1524
            shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1525 1526
    }

1527
    if (pshader)
1528
    {
1529
        gen_color_correction(buffer, dst_str, tex_dst,
1530 1531
                arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_ONE),
                arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_TWO),
1532
                priv->cur_ps_args->super.color_fixup[sampler_idx], masks);
1533
    }
1534 1535
}

1536
static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
1537 1538
        const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr)
{
1539
    /* Generate a line that does the input modifier computation and return the input register to use */
1540
    BOOL is_color = FALSE, insert_line;
1541 1542
    char regstr[256];
    char swzstr[20];
1543
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1544
    struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1545 1546
    const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
    const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO);
1547 1548

    /* Assume a new line will be added */
1549
    insert_line = TRUE;
1550 1551

    /* Get register name */
1552
    shader_arb_get_register_name(ins, &src->reg, regstr, &is_color);
1553
    shader_arb_get_swizzle(src, is_color, swzstr);
1554

1555 1556
    switch (src->modifiers)
    {
1557
    case WINED3DSPSM_NONE:
1558
        sprintf(outregstr, "%s%s", regstr, swzstr);
1559
        insert_line = FALSE;
1560
        break;
1561
    case WINED3DSPSM_NEG:
1562
        sprintf(outregstr, "-%s%s", regstr, swzstr);
1563
        insert_line = FALSE;
1564
        break;
1565
    case WINED3DSPSM_BIAS:
1566 1567
        shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr);
        break;
1568
    case WINED3DSPSM_BIASNEG:
1569 1570
        shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr);
        break;
1571
    case WINED3DSPSM_SIGN:
1572
        shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one);
1573
        break;
1574
    case WINED3DSPSM_SIGNNEG:
1575
        shader_addline(buffer, "MAD T%c, %s, -%s, %s;\n", 'A' + tmpreg, regstr, two, one);
1576
        break;
1577
    case WINED3DSPSM_COMP:
1578
        shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr);
1579
        break;
1580
    case WINED3DSPSM_X2:
1581 1582
        shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr);
        break;
1583
    case WINED3DSPSM_X2NEG:
1584 1585
        shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr);
        break;
1586
    case WINED3DSPSM_DZ:
1587 1588 1589
        shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr);
        shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
        break;
1590
    case WINED3DSPSM_DW:
1591 1592
        shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr);
        shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
1593 1594
        break;
    case WINED3DSPSM_ABS:
1595 1596
        if(ctx->target_version >= NV2) {
            sprintf(outregstr, "|%s%s|", regstr, swzstr);
1597
            insert_line = FALSE;
1598 1599 1600
        } else {
            shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
        }
1601 1602
        break;
    case WINED3DSPSM_ABSNEG:
1603 1604 1605 1606 1607 1608
        if(ctx->target_version >= NV2) {
            sprintf(outregstr, "-|%s%s|", regstr, swzstr);
        } else {
            shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
            sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr);
        }
1609
        insert_line = FALSE;
1610 1611 1612
        break;
    default:
        sprintf(outregstr, "%s%s", regstr, swzstr);
1613
        insert_line = FALSE;
1614 1615 1616 1617 1618 1619 1620
    }

    /* Return modified or original register, with swizzle */
    if (insert_line)
        sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr);
}

1621
static void pshader_hw_bem(const struct wined3d_shader_instruction *ins)
1622
{
1623
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1624
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1625
    DWORD sampler_code = dst->reg.idx[0].offset;
1626 1627 1628
    char dst_name[50];
    char src_name[2][50];

1629
    shader_arb_get_dst_param(ins, dst, dst_name);
1630

1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641
    /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
     *
     * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid
     * with bem. So delay loading the first parameter until after the perturbation calculation which needs two
     * temps is done.
     */
    shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
    shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code);
    shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]);
    shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code);
    shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]);
1642

1643 1644
    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
    shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]);
1645 1646
}

1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669
static DWORD negate_modifiers(DWORD mod, char *extra_char)
{
    *extra_char = ' ';
    switch(mod)
    {
        case WINED3DSPSM_NONE:      return WINED3DSPSM_NEG;
        case WINED3DSPSM_NEG:       return WINED3DSPSM_NONE;
        case WINED3DSPSM_BIAS:      return WINED3DSPSM_BIASNEG;
        case WINED3DSPSM_BIASNEG:   return WINED3DSPSM_BIAS;
        case WINED3DSPSM_SIGN:      return WINED3DSPSM_SIGNNEG;
        case WINED3DSPSM_SIGNNEG:   return WINED3DSPSM_SIGN;
        case WINED3DSPSM_COMP:      *extra_char = '-'; return WINED3DSPSM_COMP;
        case WINED3DSPSM_X2:        return WINED3DSPSM_X2NEG;
        case WINED3DSPSM_X2NEG:     return WINED3DSPSM_X2;
        case WINED3DSPSM_DZ:        *extra_char = '-'; return WINED3DSPSM_DZ;
        case WINED3DSPSM_DW:        *extra_char = '-'; return WINED3DSPSM_DW;
        case WINED3DSPSM_ABS:       return WINED3DSPSM_ABSNEG;
        case WINED3DSPSM_ABSNEG:    return WINED3DSPSM_ABS;
    }
    FIXME("Unknown modifier %u\n", mod);
    return mod;
}

1670
static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins)
1671
{
1672
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1673
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1674 1675
    char dst_name[50];
    char src_name[3][50];
1676 1677
    DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
            ins->ctx->reg_maps->shader_version.minor);
1678

1679
    shader_arb_get_dst_param(ins, dst, dst_name);
1680
    shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1681

1682 1683
    if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue
            && ins->dst->write_mask != WINED3DSP_WRITEMASK_3)
1684
    {
1685
        shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]);
1686 1687 1688
    }
    else
    {
1689 1690 1691 1692 1693 1694 1695
        struct wined3d_shader_src_param src0_copy = ins->src[0];
        char extra_neg;

        /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */
        src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg);

        shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]);
1696
        shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1697
        shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]);
1698 1699
        shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n", shader_arb_get_modifier(ins),
                dst_name, src_name[1], src_name[2]);
1700
    }
1701 1702
}

1703
static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins)
1704
{
1705
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1706
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1707 1708 1709
    char dst_name[50];
    char src_name[3][50];

1710
    shader_arb_get_dst_param(ins, dst, dst_name);
1711 1712

    /* Generate input register names (with modifiers) */
1713 1714 1715
    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
    shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
    shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1716

1717 1718
    shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
            dst_name, src_name[0], src_name[2], src_name[1]);
1719 1720
}

1721 1722
/** Process the WINED3DSIO_DP2ADD instruction in ARB.
 * dst = dot2(src0, src1) + src2 */
1723
static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins)
1724
{
1725
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1726
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1727 1728
    char dst_name[50];
    char src_name[3][50];
1729
    struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1730

1731
    shader_arb_get_dst_param(ins, dst, dst_name);
1732 1733
    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
    shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1734

1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773
    if(ctx->target_version >= NV3)
    {
        /* GL_NV_fragment_program2 has a 1:1 matching instruction */
        shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
        shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
                       dst_name, src_name[0], src_name[1], src_name[2]);
    }
    else if(ctx->target_version >= NV2)
    {
        /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y
         * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w
         * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y
         * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w
         *
         * Make sure that src1.zw = src1.xy, then we get a classic dp2add
         *
         * .xyxy and other swizzles that we could get with this are not valid in
         * plain ARBfp, but luckily the NV extension grammar lifts this limitation.
         */
        struct wined3d_shader_src_param tmp_param = ins->src[1];
        DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */
        tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */

        shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]);

        shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
                       dst_name, src_name[2], src_name[0], src_name[1]);
    }
    else
    {
        shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
        /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2]
        * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite
        */
        shader_addline(buffer, "MOV TA, %s;\n", src_name[0]);
        shader_addline(buffer, "MOV TA.z, 0.0;\n");
        shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]);
        shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]);
    }
1774 1775
}

1776
/* Map the opcode 1-to-1 to the GL code */
1777
static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins)
1778
{
1779
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1780
    const char *instruction;
1781
    char arguments[256], dst_str[50];
1782
    unsigned int i;
1783
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1784

1785
    switch (ins->handler_idx)
1786
    {
1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803
        case WINED3DSIH_ABS: instruction = "ABS"; break;
        case WINED3DSIH_ADD: instruction = "ADD"; break;
        case WINED3DSIH_CRS: instruction = "XPD"; break;
        case WINED3DSIH_DP3: instruction = "DP3"; break;
        case WINED3DSIH_DP4: instruction = "DP4"; break;
        case WINED3DSIH_DST: instruction = "DST"; break;
        case WINED3DSIH_FRC: instruction = "FRC"; break;
        case WINED3DSIH_LIT: instruction = "LIT"; break;
        case WINED3DSIH_LRP: instruction = "LRP"; break;
        case WINED3DSIH_MAD: instruction = "MAD"; break;
        case WINED3DSIH_MAX: instruction = "MAX"; break;
        case WINED3DSIH_MIN: instruction = "MIN"; break;
        case WINED3DSIH_MOV: instruction = "MOV"; break;
        case WINED3DSIH_MUL: instruction = "MUL"; break;
        case WINED3DSIH_SGE: instruction = "SGE"; break;
        case WINED3DSIH_SLT: instruction = "SLT"; break;
        case WINED3DSIH_SUB: instruction = "SUB"; break;
1804
        case WINED3DSIH_MOVA:instruction = "ARR"; break;
1805
        case WINED3DSIH_DSX: instruction = "DDX"; break;
1806
        default: instruction = "";
1807
            FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
1808 1809 1810
            break;
    }

1811 1812 1813 1814 1815 1816 1817 1818 1819
    /* Note that shader_arb_add_dst_param() adds spaces. */
    arguments[0] = '\0';
    shader_arb_get_dst_param(ins, dst, dst_str);
    for (i = 0; i < ins->src_count; ++i)
    {
        char operand[100];
        strcat(arguments, ", ");
        shader_arb_get_src_param(ins, &ins->src[i], i, operand);
        strcat(arguments, operand);
1820
    }
1821
    shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments);
1822 1823
}

1824
static void shader_hw_nop(const struct wined3d_shader_instruction *ins) {}
1825

1826 1827 1828 1829 1830
static DWORD shader_arb_select_component(DWORD swizzle, DWORD component)
{
    return ((swizzle >> 2 * component) & 0x3) * 0x55;
}

1831
static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
1832
{
1833
    const struct wined3d_shader *shader = ins->ctx->shader;
1834
    const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
1835
    BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1836
    struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1837 1838 1839
    const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO);
    const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE);
    const char *two = arb_get_helper_value(reg_maps->shader_version.type, ARB_TWO);
1840

1841
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1842
    char src0_param[256];
1843

1844 1845
    if (ins->handler_idx == WINED3DSIH_MOVA)
    {
1846
        const struct arb_vshader_private *shader_data = shader->backend_data;
1847
        char write_mask[6];
1848
        const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET);
1849

1850 1851 1852 1853
        if(ctx->target_version >= NV2) {
            shader_hw_map2gl(ins);
            return;
        }
1854
        shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
1855
        shader_arb_get_write_mask(ins, &ins->dst[0], write_mask);
1856

1857 1858 1859 1860 1861 1862 1863
        /* This implements the mova formula used in GLSL. The first two instructions
         * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0
         * in this case:
         * mova A0.x, 0.0
         *
         * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor
         *
1864 1865
         * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into
         * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign.
1866
         */
1867 1868
        shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero);
        shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one);
1869 1870

        shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param);
1871
        shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask);
1872
        shader_addline(buffer, "FLR TA%s, TA;\n", write_mask);
1873
        if (shader_data->rel_offset)
1874
        {
1875
            shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset);
1876
        }
1877 1878 1879
        shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask);

        ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0';
1880 1881 1882
    }
    else if (reg_maps->shader_version.major == 1
          && !shader_is_pshader_version(reg_maps->shader_version.type)
1883 1884
          && ins->dst[0].reg.type == WINED3DSPR_ADDR)
    {
1885
        const struct arb_vshader_private *shader_data = shader->backend_data;
1886
        src0_param[0] = '\0';
1887

1888
        if (shader_data->rel_offset && ctx->target_version == ARB)
1889
        {
1890
            const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET);
1891
            shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
1892
            shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset);
1893
            shader_addline(buffer, "ARL A0.x, TA.x;\n");
1894 1895 1896 1897 1898
        }
        else
        {
            /* Apple's ARB_vertex_program implementation does not accept an ARL source argument
             * with more than one component. Thus replicate the first source argument over all
1899
             * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */
1900
            struct wined3d_shader_src_param tmp_src = ins->src[0];
1901
            tmp_src.swizzle = shader_arb_select_component(tmp_src.swizzle, 0);
1902
            shader_arb_get_src_param(ins, &tmp_src, 0, src0_param);
1903 1904 1905
            shader_addline(buffer, "ARL A0.x, %s;\n", src0_param);
        }
    }
1906
    else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx[0].offset && pshader)
1907
    {
1908
        if (ctx->ps_post_process && shader->u.ps.color0_mov)
1909
        {
1910
            shader_addline(buffer, "#mov handled in srgb write or fog code\n");
1911 1912 1913 1914
            return;
        }
        shader_hw_map2gl(ins);
    }
1915 1916
    else
    {
1917
        shader_hw_map2gl(ins);
1918 1919 1920
    }
}

1921
static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins)
1922
{
1923
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1924
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1925 1926 1927
    char reg_dest[40];

    /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented,
1928
     * but >= 2.0 honors it (undocumented, but tested by the d3d9 testsuite)
1929
     */
1930
    shader_arb_get_dst_param(ins, dst, reg_dest);
1931

1932
    if (ins->ctx->reg_maps->shader_version.major >= 2)
1933
    {
1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955
        const char *kilsrc = "TA";
        BOOL is_color;

        shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color);
        if(dst->write_mask == WINED3DSP_WRITEMASK_ALL)
        {
            kilsrc = reg_dest;
        }
        else
        {
            /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example
             * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set
             * masked out components to 0(won't kill)
             */
            char x = '0', y = '0', z = '0', w = '0';
            if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x';
            if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y';
            if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z';
            if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w';
            shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w);
        }
        shader_addline(buffer, "KIL %s;\n", kilsrc);
1956 1957 1958
    }
    else
    {
1959 1960
        /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component,
         * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL
1961 1962 1963
         *
         * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same,
         * or pass in any temporary register(in shader phase 2)
1964
         */
1965 1966 1967
        if (ins->ctx->reg_maps->shader_version.minor <= 3)
            sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx[0].offset);
        else
1968
            shader_arb_get_dst_param(ins, dst, reg_dest);
1969 1970
        shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest);
        shader_addline(buffer, "KIL TA;\n");
1971 1972 1973
    }
}

1974
static void pshader_hw_tex(const struct wined3d_shader_instruction *ins)
1975
{
1976
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1977
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1978 1979
    DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
            ins->ctx->reg_maps->shader_version.minor);
1980
    struct wined3d_shader_src_param src;
1981 1982 1983 1984

    char reg_dest[40];
    char reg_coord[40];
    DWORD reg_sampler_code;
1985
    WORD myflags = 0;
1986
    BOOL swizzle_coord = FALSE;
1987 1988

    /* All versions have a destination register */
1989
    shader_arb_get_dst_param(ins, dst, reg_dest);
1990

1991
    /* 1.0-1.4: Use destination register number as texture code.
1992
       2.0+: Use provided sampler number as texture code. */
1993
    if (shader_version < WINED3D_SHADER_VERSION(2,0))
1994
        reg_sampler_code = dst->reg.idx[0].offset;
1995
    else
1996
        reg_sampler_code = ins->src[1].reg.idx[0].offset;
1997 1998

    /* 1.0-1.3: Use the texcoord varying.
1999
       1.4+: Use provided coordinate source register. */
2000
    if (shader_version < WINED3D_SHADER_VERSION(1,4))
2001
        sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code);
2002 2003 2004 2005 2006 2007 2008
    else {
        /* TEX is the only instruction that can handle DW and DZ natively */
        src = ins->src[0];
        if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE;
        if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE;
        shader_arb_get_src_param(ins, &src, 0, reg_coord);
    }
2009 2010

    /* projection flag:
2011
     * 1.1, 1.2, 1.3: Use WINED3D_TSS_TEXTURETRANSFORMFLAGS
2012 2013 2014 2015 2016 2017
     * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0]
     * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode
     */
    if (shader_version < WINED3D_SHADER_VERSION(1,4))
    {
        DWORD flags = 0;
2018
        if (reg_sampler_code < WINED3D_MAX_TEXTURES)
2019 2020
            flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
        if (flags & WINED3D_PSARGS_PROJECTED)
2021
        {
2022
            myflags |= TEX_PROJ;
2023 2024 2025
            if ((flags & ~WINED3D_PSARGS_PROJECTED) == WINED3D_TTFF_COUNT3)
                swizzle_coord = TRUE;
        }
2026 2027 2028
    }
    else if (shader_version < WINED3D_SHADER_VERSION(2,0))
    {
2029 2030 2031
        enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers;
        if (src_mod == WINED3DSPSM_DZ)
        {
2032
            swizzle_coord = TRUE;
2033
            myflags |= TEX_PROJ;
2034
        } else if(src_mod == WINED3DSPSM_DW) {
2035
            myflags |= TEX_PROJ;
2036 2037
        }
    } else {
2038 2039
        if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ;
        if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS;
2040
    }
2041 2042 2043 2044 2045 2046 2047 2048 2049

    if (swizzle_coord)
    {
        /* TXP cannot handle DZ natively, so move the z coordinate to .w.
         * reg_coord is a read-only varying register, so we need a temp reg */
        shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord);
        strcpy(reg_coord, "TA");
    }

2050
    shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL);
2051 2052
}

2053
static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins)
2054
{
2055
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2056
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2057 2058
    DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
            ins->ctx->reg_maps->shader_version.minor);
2059
    char dst_str[50];
2060

2061
    if (shader_version < WINED3D_SHADER_VERSION(1,4))
2062
    {
2063
        DWORD reg = dst->reg.idx[0].offset;
2064 2065 2066

        shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
        shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg);
2067
    } else {
2068 2069
        char reg_src[40];

2070
        shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src);
2071 2072
        shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
        shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src);
2073
    }
2074 2075
}

2076
static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins)
2077
{
2078
     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2079
     DWORD flags = 0;
2080

2081
     DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2082
     char dst_str[50];
2083
     char src_str[50];
2084

2085
     /* Note that texreg2ar treats Tx as a temporary register, not as a varying */
2086
     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2087
     shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2088 2089 2090
     /* Move .x first in case src_str is "TA" */
     shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str);
     shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str);
2091
     if (reg1 < WINED3D_MAX_TEXTURES)
2092 2093 2094 2095 2096
     {
         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
         flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
     }
     shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2097 2098
}

2099
static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins)
2100
{
2101
     struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2102

2103
     DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2104
     char dst_str[50];
2105
     char src_str[50];
2106

2107
     /* Note that texreg2gb treats Tx as a temporary register, not as a varying */
2108
     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2109
     shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2110 2111
     shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str);
     shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str);
2112
     shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL);
2113 2114
}

2115
static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins)
2116
{
2117
    DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2118
    char dst_str[50];
2119 2120
    char src_str[50];

2121
    /* Note that texreg2rg treats Tx as a temporary register, not as a varying */
2122
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2123
    shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2124
    shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL);
2125 2126
}

2127
static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins)
2128
{
2129
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2130
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2131
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2132
    char reg_coord[40], dst_reg[50], src_reg[50];
2133 2134
    DWORD reg_dest_code;

2135 2136 2137
    /* All versions have a destination register. The Tx where the texture coordinates come
     * from is the varying incarnation of the texture register
     */
2138
    reg_dest_code = dst->reg.idx[0].offset;
2139
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg);
2140
    shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg);
2141
    sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code);
2142

2143 2144
    /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
     * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register
2145 2146 2147 2148 2149 2150 2151
     *
     * GL_NV_fragment_program_option could handle this in one instruction via X2D:
     * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw
     *
     * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that
     * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV
     * extension.
2152
     */
2153
    shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code);
2154
    shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg);
2155
    shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code);
2156
    shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg);
2157

2158 2159 2160
    /* with projective textures, texbem only divides the static texture coord, not the displacement,
     * so we can't let the GL handle this.
     */
2161 2162
    if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT)
            & WINED3D_PSARGS_PROJECTED)
2163
    {
2164 2165 2166
        shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord);
        shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord);
        shader_addline(buffer, "ADD TA.xy, TA, TB;\n");
2167
    } else {
2168
        shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord);
2169
    }
2170

2171
    shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL);
2172

2173 2174
    if (ins->handler_idx == WINED3DSIH_TEXBEML)
    {
2175 2176 2177
        /* No src swizzles are allowed, so this is ok */
        shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n",
                       src_reg, reg_dest_code, reg_dest_code);
2178
        shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg);
2179
    }
2180 2181
}

2182
static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins)
2183
{
2184
    DWORD reg = ins->dst[0].reg.idx[0].offset;
2185
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2186 2187 2188
    char src0_name[50], dst_name[50];
    BOOL is_color;
    struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
2189

2190
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2191 2192 2193
    /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized
     * T<reg+1> register. Use this register to store the calculated vector
     */
2194
    tmp_reg.idx[0].offset = reg + 1;
2195 2196
    shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
    shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
2197 2198
}

2199
static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins)
2200
{
2201
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2202
    DWORD flags;
2203
    DWORD reg = ins->dst[0].reg.idx[0].offset;
2204
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2205
    char dst_str[50];
2206
    char src0_name[50];
2207
    char dst_reg[50];
2208
    BOOL is_color;
2209 2210

    /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */
2211
    shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
2212

2213
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2214
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2215
    shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2216
    flags = reg < WINED3D_MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2217
    shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2218 2219
}

2220
static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins)
2221
{
2222
    struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2223
    DWORD reg = ins->dst[0].reg.idx[0].offset;
2224
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2225 2226 2227
    char src0_name[50], dst_name[50];
    struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
    BOOL is_color;
2228 2229 2230 2231 2232

    /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
     * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
     * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
     */
2233
    tmp_reg.idx[0].offset = reg + 2 - tex_mx->current_row;
2234
    shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
2235

2236
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2237
    shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n",
2238 2239
                   dst_name, 'x' + tex_mx->current_row, reg, src0_name);
    tex_mx->texcoord_w[tex_mx->current_row++] = reg;
2240 2241
}

2242
static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins)
2243
{
2244
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2245
    struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2246
    DWORD flags;
2247
    DWORD reg = ins->dst[0].reg.idx[0].offset;
2248
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2249
    char dst_str[50];
2250 2251
    char src0_name[50], dst_name[50];
    BOOL is_color;
2252

2253
    shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2254
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2255
    shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
2256

2257
    /* Sample the texture using the calculated coordinates */
2258
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2259
    flags = reg < WINED3D_MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2260
    shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2261
    tex_mx->current_row = 0;
2262 2263
}

2264
static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins)
2265
{
2266
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2267
    struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2268
    DWORD flags;
2269
    DWORD reg = ins->dst[0].reg.idx[0].offset;
2270
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2271
    char dst_str[50];
2272
    char src0_name[50];
2273
    char dst_reg[50];
2274
    BOOL is_color;
2275

2276 2277 2278
    /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
     * components for temporary data storage
     */
2279
    shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
2280
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2281
    shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2282 2283

    /* Construct the eye-ray vector from w coordinates */
2284 2285
    shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[0]);
    shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[1]);
2286
    shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg);
2287

2288 2289
    /* Calculate reflection vector
     */
2290
    shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg);
2291
    /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
2292
    shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg);
2293
    shader_addline(buffer, "RCP TB.w, TB.w;\n");
2294 2295 2296
    shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg);
    shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
    shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg);
2297

2298
    /* Sample the texture using the calculated coordinates */
2299
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2300
    flags = reg < WINED3D_MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2301
    shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2302
    tex_mx->current_row = 0;
2303 2304
}

2305
static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
2306
{
2307
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2308
    struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2309
    DWORD flags;
2310
    DWORD reg = ins->dst[0].reg.idx[0].offset;
2311
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2312
    char dst_str[50];
2313
    char src0_name[50];
2314
    char src1_name[50];
2315
    char dst_reg[50];
2316
    BOOL is_color;
2317

2318 2319
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
    shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
2320 2321
    shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
    /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */
2322
    shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2323

2324 2325
    /* Calculate reflection vector.
     *
2326 2327 2328
     *                   dot(N, E)
     * dst_reg.xyz = 2 * --------- * N - E
     *                   dot(N, N)
2329 2330 2331
     *
     * Which normalizes the normal vector
     */
2332 2333
    shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name);
    shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg);
2334
    shader_addline(buffer, "RCP TC.w, TC.w;\n");
2335 2336 2337
    shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg);
    shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
    shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name);
2338

2339
    /* Sample the texture using the calculated coordinates */
2340
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2341
    flags = reg < WINED3D_MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2342
    shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2343
    tex_mx->current_row = 0;
2344 2345
}

2346
static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins)
2347
{
2348
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2349
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2350
    char dst_name[50];
2351 2352
    const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
    const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2353 2354

    /* texdepth has an implicit destination, the fragment depth value. It's only parameter,
2355 2356
     * which is essentially an input, is the destination register because it is the first
     * parameter. According to the msdn, this must be register r5, but let's keep it more flexible
2357
     * here(writemasks/swizzles are not valid on texdepth)
2358
     */
2359
    shader_arb_get_dst_param(ins, dst, dst_name);
2360 2361 2362 2363

    /* According to the msdn, the source register(must be r5) is unusable after
     * the texdepth instruction, so we're free to modify it
     */
2364
    shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one);
2365 2366 2367 2368 2369

    /* How to deal with the special case dst_name.g == 0? if r != 0, then
     * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
     * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
     */
2370
    shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
2371
    shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name);
2372 2373
    shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one);
    shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero);
2374 2375
}

2376 2377 2378
/** Process the WINED3DSIO_TEXDP3TEX instruction in ARB:
 * Take a 3-component dot product of the TexCoord[dstreg] and src,
 * then perform a 1D texture lookup from stage dstregnum, place into dst. */
2379
static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins)
2380
{
2381
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2382
    DWORD sampler_idx = ins->dst[0].reg.idx[0].offset;
2383
    char src0[50];
2384
    char dst_str[50];
2385

2386
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2387 2388
    shader_addline(buffer, "MOV TB, 0.0;\n");
    shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0);
2389

2390
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2391
    shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL);
2392 2393 2394 2395
}

/** Process the WINED3DSIO_TEXDP3 instruction in ARB:
 * Take a 3-component dot product of the TexCoord[dstreg] and src. */
2396
static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins)
2397
{
2398
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2399 2400
    char src0[50];
    char dst_str[50];
2401
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2402 2403

    /* Handle output register */
2404
    shader_arb_get_dst_param(ins, dst, dst_str);
2405
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2406
    shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx[0].offset, src0);
2407 2408 2409 2410
}

/** Process the WINED3DSIO_TEXM3X3 instruction in ARB
 * Perform the 3rd row of a 3x3 matrix multiply */
2411
static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins)
2412
{
2413
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2414
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2415
    char dst_str[50], dst_name[50];
2416
    char src0[50];
2417
    BOOL is_color;
2418

2419
    shader_arb_get_dst_param(ins, dst, dst_str);
2420
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2421
    shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2422
    shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0);
2423
    shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name);
2424 2425 2426 2427 2428 2429 2430
}

/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in ARB:
 * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz;  (tmp0.x has already been calculated)
 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
 */
2431
static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins)
2432
{
2433
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2434 2435 2436
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
    char src0[50], dst_name[50];
    BOOL is_color;
2437 2438
    const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
    const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2439

2440
    shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2441
    shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2442
    shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0);
2443 2444 2445 2446 2447

    /* How to deal with the special case dst_name.g == 0? if r != 0, then
     * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
     * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
     */
2448 2449
    shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
    shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name);
2450 2451
    shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one);
    shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero);
2452 2453
}

2454
/** Handles transforming all WINED3DSIO_M?x? opcodes for
2455
    Vertex/Pixel shaders to ARB_vertex_program codes */
2456
static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins)
2457
{
2458 2459
    int i;
    int nComponents = 0;
2460 2461
    struct wined3d_shader_dst_param tmp_dst = {{0}};
    struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
2462
    struct wined3d_shader_instruction tmp_ins;
2463

2464
    memset(&tmp_ins, 0, sizeof(tmp_ins));
2465

2466
    /* Set constants for the temporary argument */
2467
    tmp_ins.ctx = ins->ctx;
2468
    tmp_ins.dst_count = 1;
2469
    tmp_ins.dst = &tmp_dst;
2470
    tmp_ins.src_count = 2;
2471
    tmp_ins.src = tmp_src;
2472

2473
    switch(ins->handler_idx)
2474
    {
2475 2476
        case WINED3DSIH_M4x4:
            nComponents = 4;
2477
            tmp_ins.handler_idx = WINED3DSIH_DP4;
2478 2479 2480
            break;
        case WINED3DSIH_M4x3:
            nComponents = 3;
2481
            tmp_ins.handler_idx = WINED3DSIH_DP4;
2482 2483 2484
            break;
        case WINED3DSIH_M3x4:
            nComponents = 4;
2485
            tmp_ins.handler_idx = WINED3DSIH_DP3;
2486 2487 2488
            break;
        case WINED3DSIH_M3x3:
            nComponents = 3;
2489
            tmp_ins.handler_idx = WINED3DSIH_DP3;
2490 2491 2492
            break;
        case WINED3DSIH_M3x2:
            nComponents = 2;
2493
            tmp_ins.handler_idx = WINED3DSIH_DP3;
2494 2495
            break;
        default:
2496
            FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
2497
            break;
2498 2499
    }

2500
    tmp_dst = ins->dst[0];
2501 2502
    tmp_src[0] = ins->src[0];
    tmp_src[1] = ins->src[1];
2503 2504
    for (i = 0; i < nComponents; ++i)
    {
2505
        tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
2506
        shader_hw_map2gl(&tmp_ins);
2507
        ++tmp_src[1].reg.idx[0].offset;
2508 2509 2510
    }
}

2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534
static DWORD abs_modifier(DWORD mod, BOOL *need_abs)
{
    *need_abs = FALSE;

    switch(mod)
    {
        case WINED3DSPSM_NONE:      return WINED3DSPSM_ABS;
        case WINED3DSPSM_NEG:       return WINED3DSPSM_ABS;
        case WINED3DSPSM_BIAS:      *need_abs = TRUE; return WINED3DSPSM_BIAS;
        case WINED3DSPSM_BIASNEG:   *need_abs = TRUE; return WINED3DSPSM_BIASNEG;
        case WINED3DSPSM_SIGN:      *need_abs = TRUE; return WINED3DSPSM_SIGN;
        case WINED3DSPSM_SIGNNEG:   *need_abs = TRUE; return WINED3DSPSM_SIGNNEG;
        case WINED3DSPSM_COMP:      *need_abs = TRUE; return WINED3DSPSM_COMP;
        case WINED3DSPSM_X2:        *need_abs = TRUE; return WINED3DSPSM_X2;
        case WINED3DSPSM_X2NEG:     *need_abs = TRUE; return WINED3DSPSM_X2NEG;
        case WINED3DSPSM_DZ:        *need_abs = TRUE; return WINED3DSPSM_DZ;
        case WINED3DSPSM_DW:        *need_abs = TRUE; return WINED3DSPSM_DW;
        case WINED3DSPSM_ABS:       return WINED3DSPSM_ABS;
        case WINED3DSPSM_ABSNEG:    return WINED3DSPSM_ABS;
    }
    FIXME("Unknown modifier %u\n", mod);
    return mod;
}

2535
static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins)
2536
{
2537
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2538
    const char *instruction;
2539
    struct wined3d_shader_src_param src0_copy = ins->src[0];
2540
    BOOL need_abs = FALSE;
2541

2542
    char dst[50];
2543
    char src[50];
2544

2545
    switch(ins->handler_idx)
2546
    {
2547 2548
        case WINED3DSIH_RSQ:  instruction = "RSQ"; break;
        case WINED3DSIH_RCP:  instruction = "RCP"; break;
2549 2550 2551 2552 2553 2554 2555 2556 2557 2558
        case WINED3DSIH_EXPP:
            if (ins->ctx->reg_maps->shader_version.major < 2)
            {
                instruction = "EXP";
                break;
            }
            /* Drop through. */
        case WINED3DSIH_EXP:
            instruction = "EX2";
            break;
2559 2560
        case WINED3DSIH_LOG:
        case WINED3DSIH_LOGP:
2561 2562 2563
            /* The precision requirements suggest that LOGP matches ARBvp's LOG
             * instruction, but notice that the output of those instructions is
             * different. */
2564
            src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs);
2565
            instruction = "LG2";
2566
            break;
2567
        default: instruction = "";
2568
            FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
2569 2570 2571
            break;
    }

2572 2573 2574 2575
    /* Dx sdk says .x is used if no swizzle is given, but our test shows that
     * .w is used. */
    src0_copy.swizzle = shader_arb_select_component(src0_copy.swizzle, 3);

2576
    shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */
2577
    shader_arb_get_src_param(ins, &src0_copy, 0, src);
2578

2579 2580 2581 2582 2583 2584 2585 2586 2587 2588
    if(need_abs)
    {
        shader_addline(buffer, "ABS TA.w, %s;\n", src);
        shader_addline(buffer, "%s%s %s, TA.w;\n", instruction, shader_arb_get_modifier(ins), dst);
    }
    else
    {
        shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src);
    }

2589 2590
}

2591
static void shader_hw_nrm(const struct wined3d_shader_instruction *ins)
2592
{
2593
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2594 2595
    char dst_name[50];
    char src_name[50];
2596 2597
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
    BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
2598
    const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2599

2600
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2601
    shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name);
2602

2603 2604
    /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as
     * otherwise NRM or RSQ would return NaN */
2605 2606
    if(pshader && priv->target_version >= NV3)
    {
2607 2608 2609 2610 2611
        /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too
         *
         * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL
         */
        shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name);
2612
        shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name);
2613 2614 2615 2616 2617 2618 2619 2620
        shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero);
    }
    else if(priv->target_version >= NV2)
    {
        shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name);
        shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n");
        shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name,
                       src_name);
2621 2622 2623
    }
    else
    {
2624 2625 2626 2627 2628 2629 2630 2631 2632 2633
        const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);

        shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name);
        /* Pass any non-zero value to RSQ if the input vector has a length of zero. The
         * RSQ result doesn't matter, as long as multiplying it by 0 returns 0.
         */
        shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero);
        shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one);

        shader_addline(buffer, "RSQ TA.x, TA.x;\n");
2634
        /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/
2635
        shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name,
2636 2637
                    src_name);
    }
2638 2639
}

2640 2641
static void shader_hw_lrp(const struct wined3d_shader_instruction *ins)
{
2642
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661
    char dst_name[50];
    char src_name[3][50];

    /* ARB_fragment_program has a convenient LRP instruction */
    if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
        shader_hw_map2gl(ins);
        return;
    }

    shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
    shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
    shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);

    shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]);
    shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins),
                   dst_name, src_name[0], src_name[2]);
}

2662
static void shader_hw_sincos(const struct wined3d_shader_instruction *ins)
2663
{
2664
    /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which
2665
     * must contain fixed constants. So we need a separate function to filter those constants and
2666 2667
     * can't use map2gl
     */
2668
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2669 2670
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
    const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2671
    char dst_name[50];
2672 2673
    char src_name0[50], src_name1[50], src_name2[50];
    BOOL is_color;
2674

2675 2676 2677
    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
    if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
        shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2678 2679 2680 2681 2682 2683 2684 2685
        /* No modifiers are supported on SCS */
        shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0);

        if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE)
        {
            shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color);
            shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0);
        }
2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766
    } else if(priv->target_version >= NV2) {
        shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);

        /* Sincos writemask must be .x, .y or .xy */
        if(dst->write_mask & WINED3DSP_WRITEMASK_0)
            shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
        if(dst->write_mask & WINED3DSP_WRITEMASK_1)
            shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
    } else {
        /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8
         * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2.
         *
         * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
         * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ...
         *
         * The constants we get are:
         *
         *  +1   +1,     -1     -1     +1      +1      -1       -1
         *      ---- ,  ---- , ---- , ----- , ----- , ----- , ------
         *      1!*2    2!*4   3!*8   4!*16   5!*32   6!*64   7!*128
         *
         * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2):
         *
         * (x/2)^2 = x^2 / 4
         * (x/2)^3 = x^3 / 8
         * (x/2)^4 = x^4 / 16
         * (x/2)^5 = x^5 / 32
         * etc
         *
         * To get the final result:
         * sin(x) = 2 * sin(x/2) * cos(x/2)
         * cos(x) = cos(x/2)^2 - sin(x/2)^2
         * (from sin(x+y) and cos(x+y) rules)
         *
         * As per MSDN, dst.z is undefined after the operation, and so is
         * dst.x and dst.y if they're masked out by the writemask. Ie
         * sincos dst.y, src1, c0, c1
         * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler
         * vsa.exe also stops with an error if the dest register is the same register as the source
         * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also
         * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0).
         */
        shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
        shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2);
        shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);

        shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0);  /* x ^ 2 */
        shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0);           /* x ^ 3 */
        shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0);           /* x ^ 4 */
        shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0);           /* x ^ 5 */
        shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0);           /* x ^ 6 */
        shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0);           /* x ^ 7 */

        /* sin(x/2)
         *
         * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to
         * properly merge that with MULs in the code above?
         * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe
         * we can merge the sine and cosine MAD rows to calculate them together.
         */
        shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */
        shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */
        shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */
        shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */

        /* cos(x/2) */
        shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */
        shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */
        shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */

        if(dst->write_mask & WINED3DSP_WRITEMASK_0) {
            /* cos x */
            shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n");
            shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name);
        }
        if(dst->write_mask & WINED3DSP_WRITEMASK_1) {
            /* sin x */
            shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name);
            shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name);
        }
    }
2767 2768
}

2769 2770
static void shader_hw_sgn(const struct wined3d_shader_instruction *ins)
{
2771
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2772 2773 2774 2775
    char dst_name[50];
    char src_name[50];
    struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;

2776 2777 2778
    shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);

2779
    /* SGN is only valid in vertex shaders */
2780 2781
    if(ctx->target_version >= NV2) {
        shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name);
2782 2783 2784 2785 2786 2787 2788 2789 2790
        return;
    }

    /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false.
     * if SRC < 0.0,  SRC < -SRC = TRUE. If neither is true, src = 0.0
     */
    if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) {
        shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name);
    } else {
Stefan Dösinger's avatar
Stefan Dösinger committed
2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808
        /* src contains TA? Write to the dest first. This won't overwrite our destination.
         * Then use TA, and calculate the final result
         *
         * Not reading from TA? Store the first result in TA to avoid overwriting the
         * destination if src reg = dst reg
         */
        if(strstr(src_name, "TA"))
        {
            shader_addline(buffer, "SLT %s,  %s, -%s;\n", dst_name, src_name, src_name);
            shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name);
            shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name);
        }
        else
        {
            shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name);
            shader_addline(buffer, "SLT %s,  %s, -%s;\n", dst_name, src_name, src_name);
            shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name);
        }
2809 2810 2811
    }
}

2812 2813
static void shader_hw_dsy(const struct wined3d_shader_instruction *ins)
{
2814
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827
    char src[50];
    char dst[50];
    char dst_name[50];
    BOOL is_color;

    shader_arb_get_dst_param(ins, &ins->dst[0], dst);
    shader_arb_get_src_param(ins, &ins->src[0], 0, src);
    shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);

    shader_addline(buffer, "DDY %s, %s;\n", dst, src);
    shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name);
}

2828 2829
static void shader_hw_pow(const struct wined3d_shader_instruction *ins)
{
2830
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2831 2832 2833
    char src0[50], src1[50], dst[50];
    struct wined3d_shader_src_param src0_copy = ins->src[0];
    BOOL need_abs = FALSE;
2834 2835
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
    const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2836 2837 2838 2839 2840 2841 2842 2843 2844 2845

    /* POW operates on the absolute value of the input */
    src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs);

    shader_arb_get_dst_param(ins, &ins->dst[0], dst);
    shader_arb_get_src_param(ins, &src0_copy, 0, src0);
    shader_arb_get_src_param(ins, &ins->src[1], 1, src1);

    if (need_abs)
        shader_addline(buffer, "ABS TA.x, %s;\n", src0);
2846 2847 2848 2849 2850 2851 2852 2853
    else
        shader_addline(buffer, "MOV TA.x, %s;\n", src0);

    if (priv->target_version >= NV2)
    {
        shader_addline(buffer, "MOVC TA.y, %s;\n", src1);
        shader_addline(buffer, "POW%s %s, TA.x, TA.y;\n", shader_arb_get_modifier(ins), dst);
        shader_addline(buffer, "MOV %s (EQ.y), %s;\n", dst, one);
2854 2855 2856
    }
    else
    {
2857 2858 2859 2860 2861 2862 2863 2864 2865 2866
        const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
        const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS);

        shader_addline(buffer, "ABS TA.y, %s;\n", src1);
        shader_addline(buffer, "SGE TA.y, -TA.y, %s;\n", zero);
        /* Possibly add flt_eps to avoid getting float special values */
        shader_addline(buffer, "MAD TA.z, TA.y, %s, %s;\n", flt_eps, src1);
        shader_addline(buffer, "POW%s TA.x, TA.x, TA.z;\n", shader_arb_get_modifier(ins));
        shader_addline(buffer, "MAD TA.x, -TA.x, TA.y, TA.x;\n");
        shader_addline(buffer, "MAD %s, TA.y, %s, TA.x;\n", dst, one);
2867 2868 2869
    }
}

2870 2871
static void shader_hw_loop(const struct wined3d_shader_instruction *ins)
{
2872
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887
    char src_name[50];
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);

    /* src0 is aL */
    shader_arb_get_src_param(ins, &ins->src[1], 0, src_name);

    if(vshader)
    {
        struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
        struct list *e = list_head(&priv->control_frames);
        struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);

        if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n");
        /* The constant loader makes sure to load -1 into iX.w */
        shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name);
2888 2889
        shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop);
        shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop);
2890 2891 2892 2893 2894 2895 2896 2897 2898
    }
    else
    {
        shader_addline(buffer, "LOOP %s;\n", src_name);
    }
}

static void shader_hw_rep(const struct wined3d_shader_instruction *ins)
{
2899
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914
    char src_name[50];
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);

    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);

    /* The constant loader makes sure to load -1 into iX.w */
    if(vshader)
    {
        struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
        struct list *e = list_head(&priv->control_frames);
        struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);

        if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n");

        shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name);
2915 2916
        shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop);
        shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop);
2917 2918 2919 2920 2921 2922 2923 2924 2925
    }
    else
    {
        shader_addline(buffer, "REP %s;\n", src_name);
    }
}

static void shader_hw_endloop(const struct wined3d_shader_instruction *ins)
{
2926
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2927 2928 2929 2930 2931 2932 2933 2934 2935
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);

    if(vshader)
    {
        struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
        struct list *e = list_head(&priv->control_frames);
        struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);

        shader_addline(buffer, "ARAC aL.xy, aL;\n");
2936 2937
        shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop);
        shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop);
2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948

        if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n");
    }
    else
    {
        shader_addline(buffer, "ENDLOOP;\n");
    }
}

static void shader_hw_endrep(const struct wined3d_shader_instruction *ins)
{
2949
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2950 2951 2952 2953 2954 2955 2956 2957 2958
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);

    if(vshader)
    {
        struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
        struct list *e = list_head(&priv->control_frames);
        struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);

        shader_addline(buffer, "ARAC aL.xy, aL;\n");
2959 2960
        shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop);
        shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop);
2961 2962 2963 2964 2965 2966 2967 2968 2969

        if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n");
    }
    else
    {
        shader_addline(buffer, "ENDREP;\n");
    }
}

2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983
static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv)
{
    struct control_frame *control_frame;

    LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry)
    {
        if(control_frame->type == LOOP || control_frame->type == REP) return control_frame;
    }
    ERR("Could not find loop for break\n");
    return NULL;
}

static void shader_hw_break(const struct wined3d_shader_instruction *ins)
{
2984
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2985 2986 2987 2988 2989
    const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data);
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);

    if(vshader)
    {
2990
        shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop);
2991 2992 2993 2994 2995 2996 2997
    }
    else
    {
        shader_addline(buffer, "BRK;\n");
    }
}

2998
static const char *get_compare(enum wined3d_shader_rel_op op)
2999
{
3000
    switch (op)
3001
    {
3002 3003 3004 3005 3006 3007
        case WINED3D_SHADER_REL_OP_GT: return "GT";
        case WINED3D_SHADER_REL_OP_EQ: return "EQ";
        case WINED3D_SHADER_REL_OP_GE: return "GE";
        case WINED3D_SHADER_REL_OP_LT: return "LT";
        case WINED3D_SHADER_REL_OP_NE: return "NE";
        case WINED3D_SHADER_REL_OP_LE: return "LE";
3008
        default:
3009
            FIXME("Unrecognized operator %#x.\n", op);
3010 3011 3012 3013
            return "(\?\?)";
    }
}

3014
static enum wined3d_shader_rel_op invert_compare(enum wined3d_shader_rel_op op)
3015
{
3016
    switch (op)
3017
    {
3018 3019 3020 3021 3022 3023
        case WINED3D_SHADER_REL_OP_GT: return WINED3D_SHADER_REL_OP_LE;
        case WINED3D_SHADER_REL_OP_EQ: return WINED3D_SHADER_REL_OP_NE;
        case WINED3D_SHADER_REL_OP_GE: return WINED3D_SHADER_REL_OP_LT;
        case WINED3D_SHADER_REL_OP_LT: return WINED3D_SHADER_REL_OP_GE;
        case WINED3D_SHADER_REL_OP_NE: return WINED3D_SHADER_REL_OP_EQ;
        case WINED3D_SHADER_REL_OP_LE: return WINED3D_SHADER_REL_OP_GT;
3024
        default:
3025
            FIXME("Unrecognized operator %#x.\n", op);
3026 3027 3028 3029
            return -1;
    }
}

3030 3031
static void shader_hw_breakc(const struct wined3d_shader_instruction *ins)
{
3032
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3033
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3034 3035 3036
    const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data);
    char src_name0[50];
    char src_name1[50];
3037
    const char *comp = get_compare(ins->flags);
3038 3039 3040 3041 3042 3043 3044 3045 3046 3047

    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
    shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);

    if(vshader)
    {
        /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw
         * away the subtraction result
         */
        shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3048
        shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp);
3049 3050 3051
    }
    else
    {
3052
        shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3053 3054 3055 3056
        shader_addline(buffer, "BRK (%s.x);\n", comp);
    }
}

3057 3058
static void shader_hw_ifc(const struct wined3d_shader_instruction *ins)
{
3059
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
    struct list *e = list_head(&priv->control_frames);
    struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
    const char *comp;
    char src_name0[50];
    char src_name1[50];
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);

    shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
    shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);

    if(vshader)
    {
3073 3074
        /* Invert the flag. We jump to the else label if the condition is NOT true */
        comp = get_compare(invert_compare(ins->flags));
3075
        shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3076
        shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp);
3077 3078 3079
    }
    else
    {
3080
        comp = get_compare(ins->flags);
3081
        shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3082 3083 3084 3085 3086 3087
        shader_addline(buffer, "IF %s.x;\n", comp);
    }
}

static void shader_hw_else(const struct wined3d_shader_instruction *ins)
{
3088
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3089 3090 3091 3092 3093 3094 3095
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
    struct list *e = list_head(&priv->control_frames);
    struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);

    if(vshader)
    {
3096 3097
        shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc);
        shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc);
3098 3099 3100 3101 3102 3103 3104 3105 3106 3107
        control_frame->had_else = TRUE;
    }
    else
    {
        shader_addline(buffer, "ELSE;\n");
    }
}

static void shader_hw_endif(const struct wined3d_shader_instruction *ins)
{
3108
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3109 3110 3111 3112 3113 3114 3115 3116 3117
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
    struct list *e = list_head(&priv->control_frames);
    struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);

    if(vshader)
    {
        if(control_frame->had_else)
        {
3118
            shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc);
3119 3120 3121 3122
        }
        else
        {
            shader_addline(buffer, "#No else branch. else is endif\n");
3123
            shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc);
3124 3125 3126 3127 3128 3129 3130 3131
        }
    }
    else
    {
        shader_addline(buffer, "ENDIF;\n");
    }
}

3132 3133
static void shader_hw_texldd(const struct wined3d_shader_instruction *ins)
{
3134
    DWORD sampler_idx = ins->src[1].reg.idx[0].offset;
3135 3136
    char reg_dest[40];
    char reg_src[3][40];
3137
    WORD flags = TEX_DERIV;
3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149

    shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest);
    shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]);
    shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]);
    shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]);

    if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ;
    if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS;

    shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]);
}

3150 3151
static void shader_hw_texldl(const struct wined3d_shader_instruction *ins)
{
3152
    DWORD sampler_idx = ins->src[1].reg.idx[0].offset;
3153 3154
    char reg_dest[40];
    char reg_coord[40];
3155
    WORD flags = TEX_LOD;
3156 3157 3158 3159 3160 3161 3162 3163 3164 3165

    shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest);
    shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord);

    if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ;
    if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS;

    shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL);
}

3166 3167
static void shader_hw_label(const struct wined3d_shader_instruction *ins)
{
3168
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3169 3170
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;

3171
    priv->in_main_func = FALSE;
3172 3173 3174 3175 3176
    /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled
     * subroutine, don't generate a label that will make GL complain
     */
    if(priv->target_version == ARB) return;

3177
    shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx[0].offset);
3178 3179
}

3180 3181
static void vshader_add_footer(struct shader_arb_ctx_priv *priv_ctx,
        const struct arb_vshader_private *shader_data, const struct arb_vs_compile_args *args,
3182
        const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info,
3183
        struct wined3d_string_buffer *buffer)
3184 3185 3186 3187 3188 3189 3190 3191
{
    unsigned int i;

    /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
     * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
     * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
     * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
     */
3192
    if (args->super.fog_src == VS_FOG_Z)
3193
    {
3194
        shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212
    }
    else
    {
        if (!reg_maps->fog)
        {
            /* posFixup.x is always 1.0, so we can safely use it */
            shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n");
        }
        else
        {
            /* Clamp fogcoord */
            const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO);
            const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE);

            shader_addline(buffer, "MIN TMP_FOGCOORD.x, TMP_FOGCOORD.x, %s;\n", one);
            shader_addline(buffer, "MAX result.fogcoord.x, TMP_FOGCOORD.x, %s;\n", zero);
        }
    }
3213

3214 3215
    /* Clipplanes are always stored without y inversion */
    if (use_nv_clip(gl_info) && priv_ctx->target_version >= NV2)
3216
    {
3217
        if (args->super.clip_enabled)
3218
        {
3219
            for (i = 0; i < priv_ctx->vs_clipplanes; i++)
3220 3221 3222
            {
                shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i);
            }
3223 3224
        }
    }
3225
    else if (args->clip.boolclip.clip_texcoord)
3226
    {
3227
        static const char component[4] = {'x', 'y', 'z', 'w'};
3228
        unsigned int cur_clip = 0;
3229
        const char *zero = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ZERO);
3230

3231
        for (i = 0; i < gl_info->limits.user_clip_distances; ++i)
3232
        {
3233
            if (args->clip.boolclip.clipplane_mask & (1u << i))
3234 3235 3236 3237 3238
            {
                shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n",
                               component[cur_clip++], i);
            }
        }
3239
        switch (cur_clip)
3240 3241
        {
            case 0:
3242
                shader_addline(buffer, "MOV TA, %s;\n", zero);
3243 3244
                break;
            case 1:
3245
                shader_addline(buffer, "MOV TA.yzw, %s;\n", zero);
3246 3247
                break;
            case 2:
3248
                shader_addline(buffer, "MOV TA.zw, %s;\n", zero);
3249 3250
                break;
            case 3:
3251
                shader_addline(buffer, "MOV TA.w, %s;\n", zero);
3252 3253 3254
                break;
        }
        shader_addline(buffer, "MOV result.texcoord[%u], TA;\n",
3255
                       args->clip.boolclip.clip_texcoord - 1);
3256 3257
    }

3258 3259 3260 3261 3262 3263 3264
    /* Write the final position.
     *
     * OpenGL coordinates specify the center of the pixel while d3d coords specify
     * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
     * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
     * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
     */
3265
    if (!gl_info->supported[ARB_CLIP_CONTROL])
3266
    {
3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282
        shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n");
        shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n");
        shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n");

        /* Z coord [0;1]->[-1;1] mapping, see comment in
         * get_projection_matrix() in utils.c. */
        if (need_helper_const(shader_data, reg_maps, gl_info))
        {
            const char *two = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_TWO);
            shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two);
        }
        else
        {
            shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n");
            shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n");
        }
3283 3284 3285 3286 3287 3288 3289
    }

    shader_addline(buffer, "MOV result.position, TMP_OUT;\n");

    priv_ctx->footer_written = TRUE;
}

3290 3291
static void shader_hw_ret(const struct wined3d_shader_instruction *ins)
{
3292
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3293
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3294
    const struct wined3d_shader *shader = ins->ctx->shader;
3295
    BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3296 3297 3298

    if(priv->target_version == ARB) return;

3299 3300
    if(vshader)
    {
3301
        if (priv->in_main_func) vshader_add_footer(priv, shader->backend_data,
3302
                priv->cur_vs_args, ins->ctx->reg_maps, priv->gl_info, buffer);
3303 3304
    }

3305 3306 3307 3308 3309
    shader_addline(buffer, "RET;\n");
}

static void shader_hw_call(const struct wined3d_shader_instruction *ins)
{
3310
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3311
    shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx[0].offset);
3312 3313
}

3314 3315
static BOOL shader_arb_compile(const struct wined3d_gl_info *gl_info, GLenum target, const char *src)
{
3316
    const char *ptr, *line;
3317 3318
    GLint native, pos;

3319 3320 3321 3322 3323 3324
    if (TRACE_ON(d3d_shader))
    {
        ptr = src;
        while ((line = get_line(&ptr))) TRACE_(d3d_shader)("    %.*s", (int)(ptr - line), line);
    }

3325 3326 3327 3328 3329 3330 3331 3332 3333 3334
    GL_EXTCALL(glProgramStringARB(target, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(src), src));
    checkGLcall("glProgramStringARB()");

    if (FIXME_ON(d3d_shader))
    {
        gl_info->gl_ops.gl.p_glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
        if (pos != -1)
        {
            FIXME_(d3d_shader)("Program error at position %d: %s\n\n", pos,
                    debugstr_a((const char *)gl_info->gl_ops.gl.p_glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
3335 3336 3337 3338
            ptr = src;
            while ((line = get_line(&ptr))) FIXME_(d3d_shader)("    %.*s", (int)(ptr - line), line);
            FIXME_(d3d_shader)("\n");

3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353
            return FALSE;
        }
    }

    if (WARN_ON(d3d_perf))
    {
        GL_EXTCALL(glGetProgramivARB(target, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
        checkGLcall("glGetProgramivARB()");
        if (!native)
            WARN_(d3d_perf)("Program exceeds native resource limits.\n");
    }

    return TRUE;
}

3354
static void arbfp_add_sRGB_correction(struct wined3d_string_buffer *buffer, const char *fragcolor,
3355 3356
        const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode)
{
3357 3358
    /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */

3359 3360 3361
    if(condcode)
    {
        /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */
3362
        shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.x;\n", tmp1, fragcolor);
3363
        /* Calculate the > 0.0031308 case */
3364 3365 3366 3367 3368
        shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts0.x;\n", fragcolor, fragcolor);
        shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts0.x;\n", fragcolor, fragcolor);
        shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts0.x;\n", fragcolor, fragcolor);
        shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts0.y;\n", fragcolor, fragcolor);
        shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts0.z;\n", fragcolor, fragcolor);
3369
        /* Calculate the < case */
3370
        shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts0.w, %s;\n", fragcolor, fragcolor);
3371 3372 3373 3374
    }
    else
    {
        /* Calculate the > 0.0031308 case */
3375 3376 3377 3378 3379
        shader_addline(buffer, "POW %s.x, %s.x, srgb_consts0.x;\n", tmp1, fragcolor);
        shader_addline(buffer, "POW %s.y, %s.y, srgb_consts0.x;\n", tmp1, fragcolor);
        shader_addline(buffer, "POW %s.z, %s.z, srgb_consts0.x;\n", tmp1, fragcolor);
        shader_addline(buffer, "MUL %s, %s, srgb_consts0.y;\n", tmp1, tmp1);
        shader_addline(buffer, "SUB %s, %s, srgb_consts0.z;\n", tmp1, tmp1);
3380
        /* Calculate the < case */
3381
        shader_addline(buffer, "MUL %s, srgb_consts0.w, %s;\n", tmp2, fragcolor);
3382
        /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */
3383 3384
        shader_addline(buffer, "SLT %s, srgb_consts1.x, %s;\n", tmp3, fragcolor);
        shader_addline(buffer, "SGE %s, srgb_consts1.x, %s;\n", tmp4, fragcolor);
3385 3386 3387 3388 3389 3390 3391 3392 3393 3394
        /* Store the components > 0.0031308 in the destination */
        shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3);
        /* Add the components that are < 0.0031308 */
        shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor);
        /* Move everything into result.color at once. Nvidia hardware cannot handle partial
        * result.color writes(.rgb first, then .a), or handle overwriting already written
        * components. The assembler uses a temporary register in this case, which is usually
        * not allocated from one of our registers that were used earlier.
        */
    }
3395
    /* [0.0;1.0] clamping. Not needed, this is done implicitly */
3396 3397
}

3398
static const DWORD *find_loop_control_values(const struct wined3d_shader *shader, DWORD idx)
3399
{
3400
    const struct wined3d_shader_lconst *constant;
3401

3402
    LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
3403 3404 3405 3406 3407 3408 3409 3410 3411
    {
        if (constant->idx == idx)
        {
            return constant->value;
        }
    }
    return NULL;
}

3412
static void init_ps_input(const struct wined3d_shader *shader,
3413
        const struct arb_ps_compile_args *args, struct shader_arb_ctx_priv *priv)
3414
{
3415
    static const char * const texcoords[8] =
3416 3417 3418 3419 3420
    {
        "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]",
        "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]"
    };
    unsigned int i;
3421
    const struct wined3d_shader_signature_element *input;
3422 3423 3424
    const char *semantic_name;
    DWORD semantic_idx;

3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436
    if (args->super.vp_mode == WINED3D_VP_MODE_SHADER)
    {
        /* That one is easy. The vertex shaders provide v0-v7 in
         * fragment.texcoord and v8 and v9 in fragment.color. */
        for (i = 0; i < 8; ++i)
        {
            priv->ps_input[i] = texcoords[i];
        }
        priv->ps_input[8] = "fragment.color.primary";
        priv->ps_input[9] = "fragment.color.secondary";
        return;
    }
3437

3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451
    /* The fragment shader has to collect the varyings on its own. In any case
     * properly load color0 and color1. In the case of pre-transformed
     * vertices also load texture coordinates. Set other attributes to 0.0.
     *
     * For fixed-function this behavior is correct, according to the tests.
     * For pre-transformed we'd either need a replacement shader that can load
     * other attributes like BINORMAL, or load the texture coordinate
     * attribute pointers to match the fragment shader signature. */
    for (i = 0; i < shader->input_signature.element_count; ++i)
    {
        input = &shader->input_signature.elements[i];
        if (!(semantic_name = input->semantic_name))
            continue;
        semantic_idx = input->semantic_idx;
3452

3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483
        if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR))
        {
            if (!semantic_idx)
                priv->ps_input[input->register_idx] = "fragment.color.primary";
            else if (semantic_idx == 1)
                priv->ps_input[input->register_idx] = "fragment.color.secondary";
            else
                priv->ps_input[input->register_idx] = "0.0";
        }
        else if (args->super.vp_mode == WINED3D_VP_MODE_FF)
        {
            priv->ps_input[input->register_idx] = "0.0";
        }
        else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
        {
            if (semantic_idx < 8)
                priv->ps_input[input->register_idx] = texcoords[semantic_idx];
            else
                priv->ps_input[input->register_idx] = "0.0";
        }
        else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG))
        {
            if (!semantic_idx)
                priv->ps_input[input->register_idx] = "fragment.fogcoord";
            else
                priv->ps_input[input->register_idx] = "0.0";
        }
        else
        {
            priv->ps_input[input->register_idx] = "0.0";
        }
3484

3485 3486
        TRACE("v%u, semantic %s%u is %s\n", input->register_idx,
                semantic_name, semantic_idx, priv->ps_input[input->register_idx]);
3487 3488 3489
    }
}

3490
static void arbfp_add_linear_fog(struct wined3d_string_buffer *buffer,
3491 3492 3493 3494 3495 3496 3497
        const char *fragcolor, const char *tmp)
{
    shader_addline(buffer, "SUB %s.x, state.fog.params.z, fragment.fogcoord.x;\n", tmp);
    shader_addline(buffer, "MUL_SAT %s.x, %s.x, state.fog.params.w;\n", tmp, tmp);
    shader_addline(buffer, "LRP %s.rgb, %s.x, %s, state.fog.color;\n", fragcolor, tmp, fragcolor);
}

3498
/* Context activation is done by the caller. */
3499
static GLuint shader_arb_generate_pshader(const struct wined3d_shader *shader,
3500
        const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer,
3501
        const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled)
3502
{
3503
    const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
3504
    GLuint retval;
3505
    char fragcolor[16];
3506
    DWORD next_local = 0;
3507
    struct shader_arb_ctx_priv priv_ctx;
3508
    BOOL dcl_td = FALSE;
3509
    BOOL want_nv_prog = FALSE;
3510
    struct arb_pshader_private *shader_priv = shader->backend_data;
3511
    DWORD map;
3512
    BOOL custom_linear_fog = FALSE;
3513

3514
    char srgbtmp[4][4];
3515
    char ftoa_tmp[17];
3516 3517
    unsigned int i, found = 0;

3518 3519 3520
    for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
    {
        if (!(map & 1)
3521
                || (shader->u.ps.color0_mov && i == shader->u.ps.color0_reg)
3522
                || (reg_maps->shader_version.major < 2 && !i))
3523
            continue;
3524

3525 3526 3527
        sprintf(srgbtmp[found], "R%u", i);
        ++found;
        if (found == 4) break;
3528 3529 3530 3531 3532 3533 3534
    }

    switch(found) {
        case 0:
            sprintf(srgbtmp[0], "TA");
            sprintf(srgbtmp[1], "TB");
            sprintf(srgbtmp[2], "TC");
3535 3536
            sprintf(srgbtmp[3], "TD");
            dcl_td = TRUE;
3537 3538 3539 3540
            break;
        case 1:
            sprintf(srgbtmp[1], "TA");
            sprintf(srgbtmp[2], "TB");
3541
            sprintf(srgbtmp[3], "TC");
3542 3543 3544
            break;
        case 2:
            sprintf(srgbtmp[2], "TA");
3545 3546 3547 3548
            sprintf(srgbtmp[3], "TB");
            break;
        case 3:
            sprintf(srgbtmp[3], "TA");
3549
            break;
3550 3551
        case 4:
            break;
3552
    }
3553 3554

    /*  Create the hw ARB shader */
3555
    memset(&priv_ctx, 0, sizeof(priv_ctx));
3556
    priv_ctx.gl_info = gl_info;
3557
    priv_ctx.cur_ps_args = args;
3558
    priv_ctx.compiled_fprog = compiled;
3559
    priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info;
3560
    init_ps_input(shader, args, &priv_ctx);
3561
    list_init(&priv_ctx.control_frames);
3562
    priv_ctx.ps_post_process = args->super.srgb_correction;
3563

3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574
    /* Avoid enabling NV_fragment_program* if we do not need it.
     *
     * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register,
     * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation
     * is faster than what we gain from using higher native instructions. There are some things though
     * that cannot be emulated. In that case enable the extensions.
     * If the extension is enabled, instruction handlers that support both ways will use it.
     *
     * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program.
     * So enable the best we can get.
     */
3575
    if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd ||
3576
       reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall)
3577 3578 3579 3580
    {
        want_nv_prog = TRUE;
    }

3581
    shader_addline(buffer, "!!ARBfp1.0\n");
3582 3583
    if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2])
    {
3584 3585
        shader_addline(buffer, "OPTION NV_fragment_program2;\n");
        priv_ctx.target_version = NV3;
3586 3587 3588
    }
    else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION])
    {
3589 3590 3591
        shader_addline(buffer, "OPTION NV_fragment_program;\n");
        priv_ctx.target_version = NV2;
    } else {
3592 3593 3594 3595 3596 3597 3598 3599
        if(want_nv_prog)
        {
            /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some
             * limits properly
             */
            ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n");
            ERR("Try GLSL\n");
        }
3600 3601
        priv_ctx.target_version = ARB;
    }
3602

3603
    if (reg_maps->rt_mask > 1)
3604 3605 3606 3607
    {
        shader_addline(buffer, "OPTION ARB_draw_buffers;\n");
    }

3608 3609
    if (reg_maps->shader_version.major < 3)
    {
3610 3611 3612
        switch (args->super.fog)
        {
            case WINED3D_FFP_PS_FOG_OFF:
3613
                break;
3614
            case WINED3D_FFP_PS_FOG_LINEAR:
3615 3616
                if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG)
                {
3617 3618
                    custom_linear_fog = TRUE;
                    priv_ctx.ps_post_process = TRUE;
3619 3620
                    break;
                }
3621 3622
                shader_addline(buffer, "OPTION ARB_fog_linear;\n");
                break;
3623
            case WINED3D_FFP_PS_FOG_EXP:
3624 3625
                shader_addline(buffer, "OPTION ARB_fog_exp;\n");
                break;
3626
            case WINED3D_FFP_PS_FOG_EXP2:
3627 3628 3629 3630 3631
                shader_addline(buffer, "OPTION ARB_fog_exp2;\n");
                break;
        }
    }

3632 3633 3634 3635
    /* For now always declare the temps. At least the Nvidia assembler optimizes completely
     * unused temps away(but occupies them for the whole shader if they're used once). Always
     * declaring them avoids tricky bookkeeping work
     */
3636 3637 3638
    shader_addline(buffer, "TEMP TA;\n");      /* Used for modifiers */
    shader_addline(buffer, "TEMP TB;\n");      /* Used for modifiers */
    shader_addline(buffer, "TEMP TC;\n");      /* Used for modifiers */
3639
    if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */
3640 3641
    shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
    shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n");
3642 3643
    wined3d_ftoa(eps, ftoa_tmp);
    shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %s, 0.0 };\n", ftoa_tmp);
3644

3645 3646
    if (reg_maps->shader_version.major < 2)
    {
3647
        strcpy(fragcolor, "R0");
3648 3649 3650
    }
    else
    {
3651
        if (priv_ctx.ps_post_process)
3652 3653 3654 3655 3656 3657 3658
        {
            if (shader->u.ps.color0_mov)
            {
                sprintf(fragcolor, "R%u", shader->u.ps.color0_reg);
            }
            else
            {
3659 3660 3661
                shader_addline(buffer, "TEMP TMP_COLOR;\n");
                strcpy(fragcolor, "TMP_COLOR");
            }
3662
        } else {
3663
            strcpy(fragcolor, "result.color");
3664
        }
3665 3666
    }

3667 3668 3669
    if (args->super.srgb_correction)
    {
        shader_addline(buffer, "PARAM srgb_consts0 = ");
3670
        shader_arb_append_imm_vec4(buffer, &wined3d_srgb_const[0].x);
3671 3672
        shader_addline(buffer, ";\n");
        shader_addline(buffer, "PARAM srgb_consts1 = ");
3673
        shader_arb_append_imm_vec4(buffer, &wined3d_srgb_const[1].x);
3674
        shader_addline(buffer, ";\n");
3675 3676
    }

3677
    /* Base Declarations */
3678
    shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, NULL, &priv_ctx);
3679

3680 3681
    for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
    {
3682 3683
        unsigned char bump_const;

3684
        if (!(map & 1)) continue;
3685

3686 3687 3688 3689 3690
        bump_const = compiled->numbumpenvmatconsts;
        compiled->bumpenvmatconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED;
        compiled->bumpenvmatconst[bump_const].texunit = i;
        compiled->luminanceconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED;
        compiled->luminanceconst[bump_const].texunit = i;
3691 3692 3693 3694 3695 3696 3697 3698 3699 3700

        /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported
         * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading
         * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped
         * textures due to conditional NP2 restrictions)
         *
         * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of
         * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants,
         * their location is shader dependent anyway and they cannot be loaded globally.
         */
3701
        compiled->bumpenvmatconst[bump_const].const_num = next_local++;
3702
        shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n",
3703 3704
                       i, compiled->bumpenvmatconst[bump_const].const_num);
        compiled->numbumpenvmatconsts = bump_const + 1;
3705

3706 3707
        if (!(reg_maps->luminanceparams & (1u << i)))
            continue;
3708

3709
        compiled->luminanceconst[bump_const].const_num = next_local++;
3710
        shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n",
3711
                       i, compiled->luminanceconst[bump_const].const_num);
3712
    }
3713

3714
    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
3715 3716
    {
        compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED;
3717
        if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2)
3718
        {
3719
            const DWORD *control_values = find_loop_control_values(shader, i);
3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734

            if(control_values)
            {
                shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i,
                                control_values[0], control_values[1], control_values[2]);
            }
            else
            {
                compiled->int_consts[i] = next_local;
                compiled->num_int_consts++;
                shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++);
            }
        }
    }

3735 3736 3737 3738
    if(reg_maps->vpos || reg_maps->usesdsy)
    {
        compiled->ycorrection = next_local;
        shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++);
3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750

        if(reg_maps->vpos)
        {
            shader_addline(buffer, "TEMP vpos;\n");
            /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
             * ycorrection.y: -1.0(onscreen), 1.0(offscreen)
             * ycorrection.z: 1.0
             * ycorrection.w: 0.0
             */
            shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n");
            shader_addline(buffer, "FLR vpos.xy, vpos;\n");
        }
3751 3752 3753 3754 3755 3756
    }
    else
    {
        compiled->ycorrection = WINED3D_CONST_NUM_UNUSED;
    }

3757 3758
    /* Load constants to fixup NP2 texcoords if there are still free constants left:
     * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume
3759
     * at most 8 (WINED3D_MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to
3760 3761 3762 3763
     * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not
     * applied / activated. This will probably result in wrong rendering of the texture, but will save us from
     * shader compilation errors and the subsequent errors when drawing with this shader. */
    if (priv_ctx.cur_ps_args->super.np2_fixup) {
3764
        unsigned char cur_fixup_sampler = 0;
3765 3766 3767

        struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info;
        const WORD map = priv_ctx.cur_ps_args->super.np2_fixup;
3768
        const UINT max_lconsts = gl_info->limits.arb_ps_local_constants;
3769 3770 3771 3772

        fixup->offset = next_local;
        fixup->super.active = 0;

3773
        for (i = 0; i < WINED3D_MAX_FRAGMENT_SAMPLERS; ++i)
3774 3775 3776
        {
            if (!(map & (1u << i)))
                continue;
3777

3778 3779 3780
            if (fixup->offset + (cur_fixup_sampler >> 1) < max_lconsts)
            {
                fixup->super.active |= (1u << i);
3781
                fixup->super.idx[i] = cur_fixup_sampler++;
3782 3783 3784
            }
            else
            {
3785 3786 3787 3788 3789 3790
                FIXME("No free constant found to load NP2 fixup data into shader. "
                      "Sampling from this texture will probably look wrong.\n");
                break;
            }
        }

3791
        fixup->super.num_consts = (cur_fixup_sampler + 1) >> 1;
3792 3793 3794 3795 3796 3797
        if (fixup->super.num_consts) {
            shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n",
                           fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1);
        }
    }

3798
    if (shader_priv->clipplane_emulation != ~0U && args->clip)
3799
    {
3800
        shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation);
3801 3802
    }

3803
    /* Base Shader Body */
3804
    if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL)))
3805
        return 0;
3806

3807
    if(args->super.srgb_correction) {
3808 3809
        arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3],
                                  priv_ctx.target_version >= NV2);
3810 3811
    }

3812
    if (custom_linear_fog)
3813 3814
        arbfp_add_linear_fog(buffer, fragcolor, "TA");

3815
    if(strcmp(fragcolor, "result.color")) {
3816
        shader_addline(buffer, "MOV result.color, %s;\n", fragcolor);
3817 3818 3819 3820
    }
    shader_addline(buffer, "END\n");

    /* TODO: change to resource.glObjectHandle or something like that */
3821
    GL_EXTCALL(glGenProgramsARB(1, &retval));
3822

3823 3824
    TRACE("Creating a hw pixel shader, prg=%d\n", retval);
    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval));
3825

3826
    TRACE("Created hw pixel shader, prg=%d\n", retval);
3827 3828
    if (!shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer->buffer))
        return 0;
3829

3830
    return retval;
3831 3832
}

3833
static int compare_sig(const struct wined3d_shader_signature *sig1, const struct wined3d_shader_signature *sig2)
3834 3835 3836 3837
{
    unsigned int i;
    int ret;

3838 3839 3840 3841
    if (sig1->element_count != sig2->element_count)
        return sig1->element_count < sig2->element_count ? -1 : 1;

    for (i = 0; i < sig1->element_count; ++i)
3842
    {
3843 3844 3845 3846 3847 3848
        const struct wined3d_shader_signature_element *e1, *e2;

        e1 = &sig1->elements[i];
        e2 = &sig2->elements[i];

        if (!e1->semantic_name || !e2->semantic_name)
3849
        {
3850 3851 3852 3853
            /* Compare pointers, not contents. One string is NULL (element
             * does not exist), the other one is not NULL. */
            if (e1->semantic_name != e2->semantic_name)
                return e1->semantic_name < e2->semantic_name ? -1 : 1;
3854 3855 3856
            continue;
        }

3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868
        if ((ret = strcmp(e1->semantic_name, e2->semantic_name)))
            return ret;
        if (e1->semantic_idx != e2->semantic_idx)
            return e1->semantic_idx < e2->semantic_idx ? -1 : 1;
        if (e1->sysval_semantic != e2->sysval_semantic)
            return e1->sysval_semantic < e2->sysval_semantic ? -1 : 1;
        if (e1->component_type != e2->component_type)
            return e1->component_type < e2->component_type ? -1 : 1;
        if (e1->register_idx != e2->register_idx)
            return e1->register_idx < e2->register_idx ? -1 : 1;
        if (e1->mask != e2->mask)
            return e1->mask < e2->mask ? -1 : 1;
3869 3870 3871 3872
    }
    return 0;
}

3873
static void clone_sig(struct wined3d_shader_signature *new, const struct wined3d_shader_signature *sig)
3874
{
3875
    unsigned int i;
3876 3877
    char *name;

3878
    new->element_count = sig->element_count;
3879
    new->elements = heap_calloc(new->element_count, sizeof(*new->elements));
3880
    for (i = 0; i < sig->element_count; ++i)
3881
    {
3882 3883 3884 3885
        new->elements[i] = sig->elements[i];

        if (!new->elements[i].semantic_name)
            continue;
3886 3887

        /* Clone the semantic string */
3888
        name = heap_alloc(strlen(sig->elements[i].semantic_name) + 1);
3889 3890
        strcpy(name, sig->elements[i].semantic_name);
        new->elements[i].semantic_name = name;
3891 3892 3893
    }
}

3894
static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature *sig)
3895 3896 3897 3898
{
    struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig);
    struct ps_signature *found_sig;

3899
    if (entry)
3900 3901 3902 3903 3904
    {
        found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
        TRACE("Found existing signature %u\n", found_sig->idx);
        return found_sig->idx;
    }
3905
    found_sig = heap_alloc_zero(sizeof(*found_sig));
3906
    clone_sig(&found_sig->sig, sig);
3907 3908 3909 3910 3911 3912 3913 3914 3915
    found_sig->idx = priv->ps_sig_number++;
    TRACE("New signature stored and assigned number %u\n", found_sig->idx);
    if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1)
    {
        ERR("Failed to insert program entry.\n");
    }
    return found_sig->idx;
}

3916
static void init_output_registers(const struct wined3d_shader *shader,
3917
        const struct wined3d_shader_signature *ps_input_sig,
3918
        struct shader_arb_ctx_priv *priv_ctx, struct arb_vs_compiled_shader *compiled)
3919 3920
{
    unsigned int i, j;
3921
    static const char * const texcoords[8] =
3922 3923 3924 3925 3926 3927 3928
    {
        "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]",
        "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]"
    };
    /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary
     * and varying 9 to result.color.secondary
     */
3929
    static const char * const decl_idx_to_string[MAX_REG_INPUT] =
3930
    {
3931 3932
        "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]",
        "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]",
3933 3934 3935
        "result.color.primary", "result.color.secondary"
    };

3936
    if (!ps_input_sig)
3937 3938 3939 3940 3941 3942 3943 3944 3945
    {
        TRACE("Pixel shader uses builtin varyings\n");
        /* Map builtins to builtins */
        for(i = 0; i < 8; i++)
        {
            priv_ctx->texcrd_output[i] = texcoords[i];
        }
        priv_ctx->color_output[0] = "result.color.primary";
        priv_ctx->color_output[1] = "result.color.secondary";
3946
        priv_ctx->fog_output = "TMP_FOGCOORD";
3947 3948

        /* Map declared regs to builtins. Use "TA" to /dev/null unread output */
3949
        for (i = 0; i < shader->output_signature.element_count; ++i)
3950
        {
3951 3952 3953 3954
            const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i];

            if (!output->semantic_name)
                continue;
3955

3956
            if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION))
3957
            {
3958 3959 3960 3961 3962
                TRACE("o%u is TMP_OUT\n", output->register_idx);
                if (!output->semantic_idx)
                    priv_ctx->vs_output[output->register_idx] = "TMP_OUT";
                else
                    priv_ctx->vs_output[output->register_idx] = "TA";
3963
            }
3964
            else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE))
3965
            {
3966 3967 3968 3969 3970
                TRACE("o%u is result.pointsize\n", output->register_idx);
                if (!output->semantic_idx)
                    priv_ctx->vs_output[output->register_idx] = "result.pointsize";
                else
                    priv_ctx->vs_output[output->register_idx] = "TA";
3971
            }
3972
            else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_COLOR))
3973
            {
3974 3975 3976 3977 3978 3979
                TRACE("o%u is result.color.?, idx %u\n", output->register_idx, output->semantic_idx);
                if (!output->semantic_idx)
                    priv_ctx->vs_output[output->register_idx] = "result.color.primary";
                else if (output->semantic_idx == 1)
                    priv_ctx->vs_output[output->register_idx] = "result.color.secondary";
                else priv_ctx->vs_output[output->register_idx] = "TA";
3980
            }
3981
            else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
3982
            {
3983 3984 3985 3986 3987
                TRACE("o%u is result.texcoord[%u]\n", output->register_idx, output->semantic_idx);
                if (output->semantic_idx >= 8)
                    priv_ctx->vs_output[output->register_idx] = "TA";
                else
                    priv_ctx->vs_output[output->register_idx] = texcoords[output->semantic_idx];
3988
            }
3989
            else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_FOG))
3990
            {
3991 3992 3993 3994 3995
                TRACE("o%u is result.fogcoord\n", output->register_idx);
                if (output->semantic_idx > 0)
                    priv_ctx->vs_output[output->register_idx] = "TA";
                else
                    priv_ctx->vs_output[output->register_idx] = "result.fogcoord";
3996 3997 3998
            }
            else
            {
3999
                priv_ctx->vs_output[output->register_idx] = "TA";
4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015
            }
        }
        return;
    }

    TRACE("Pixel shader uses declared varyings\n");

    /* Map builtin to declared. /dev/null the results by default to the TA temp reg */
    for(i = 0; i < 8; i++)
    {
        priv_ctx->texcrd_output[i] = "TA";
    }
    priv_ctx->color_output[0] = "TA";
    priv_ctx->color_output[1] = "TA";
    priv_ctx->fog_output = "TA";

4016
    for (i = 0; i < ps_input_sig->element_count; ++i)
4017
    {
4018
        const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[i];
4019

4020 4021
        if (!input->semantic_name)
            continue;
4022 4023 4024 4025 4026 4027 4028

        /* If a declared input register is not written by builtin arguments, don't write to it.
         * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D
         *
         * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes
         * to TMP_OUT in any case
         */
4029
        if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
4030
        {
4031 4032
            if (input->semantic_idx < 8)
                priv_ctx->texcrd_output[input->semantic_idx] = decl_idx_to_string[input->register_idx];
4033
        }
4034
        else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_COLOR))
4035
        {
4036 4037
            if (input->semantic_idx < 2)
                priv_ctx->color_output[input->semantic_idx] = decl_idx_to_string[input->register_idx];
4038
        }
4039
        else if (shader_match_semantic(input->semantic_name, WINED3D_DECL_USAGE_FOG))
4040
        {
4041 4042
            if (!input->semantic_idx)
                priv_ctx->fog_output = decl_idx_to_string[input->register_idx];
4043
        }
4044 4045 4046 4047 4048
        else
        {
            continue;
        }

4049 4050
        if (!strcmp(decl_idx_to_string[input->register_idx], "result.color.primary")
                || !strcmp(decl_idx_to_string[input->register_idx], "result.color.secondary"))
4051 4052 4053
        {
            compiled->need_color_unclamp = TRUE;
        }
4054 4055 4056
    }

    /* Map declared to declared */
4057
    for (i = 0; i < shader->output_signature.element_count; ++i)
4058
    {
4059 4060
        const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i];

4061
        /* Write unread output to TA to throw them away */
4062 4063 4064 4065
        priv_ctx->vs_output[output->register_idx] = "TA";

        if (!output->semantic_name)
            continue;
4066

4067
        if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_POSITION) && !output->semantic_idx)
4068
        {
4069
            priv_ctx->vs_output[output->register_idx] = "TMP_OUT";
4070 4071
            continue;
        }
4072
        else if (shader_match_semantic(output->semantic_name, WINED3D_DECL_USAGE_PSIZE) && !output->semantic_idx)
4073
        {
4074
            priv_ctx->vs_output[output->register_idx] = "result.pointsize";
4075 4076 4077
            continue;
        }

4078
        for (j = 0; j < ps_input_sig->element_count; ++j)
4079
        {
4080
            const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[j];
4081 4082 4083

            if (!input->semantic_name)
                continue;
4084

4085 4086
            if (!strcmp(input->semantic_name, output->semantic_name)
                    && input->semantic_idx == output->semantic_idx)
4087
            {
4088
                priv_ctx->vs_output[output->register_idx] = decl_idx_to_string[input->register_idx];
4089

4090 4091
                if (!strcmp(priv_ctx->vs_output[output->register_idx], "result.color.primary")
                        || !strcmp(priv_ctx->vs_output[output->register_idx], "result.color.secondary"))
4092 4093 4094
                {
                    compiled->need_color_unclamp = TRUE;
                }
4095 4096 4097 4098 4099
            }
        }
    }
}

4100
/* Context activation is done by the caller. */
4101
static GLuint shader_arb_generate_vshader(const struct wined3d_shader *shader,
4102
        const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer,
4103
        const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled,
4104
        const struct wined3d_shader_signature *ps_input_sig)
4105
{
4106 4107
    const struct arb_vshader_private *shader_data = shader->backend_data;
    const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4108
    GLuint ret;
4109
    DWORD next_local = 0;
4110
    struct shader_arb_ctx_priv priv_ctx;
4111
    unsigned int i;
4112

4113
    memset(&priv_ctx, 0, sizeof(priv_ctx));
4114
    priv_ctx.gl_info = gl_info;
4115
    priv_ctx.cur_vs_args = args;
4116
    list_init(&priv_ctx.control_frames);
4117
    init_output_registers(shader, ps_input_sig, &priv_ctx, compiled);
4118

4119 4120
    /*  Create the hw ARB shader */
    shader_addline(buffer, "!!ARBvp1.0\n");
4121

4122 4123 4124
    /* Always enable the NV extension if available. Unlike fragment shaders, there is no
     * mesurable performance penalty, and we can always make use of it for clipplanes.
     */
4125 4126
    if (gl_info->supported[NV_VERTEX_PROGRAM3])
    {
4127 4128 4129
        shader_addline(buffer, "OPTION NV_vertex_program3;\n");
        priv_ctx.target_version = NV3;
        shader_addline(buffer, "ADDRESS aL;\n");
4130 4131 4132
    }
    else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])
    {
4133 4134
        shader_addline(buffer, "OPTION NV_vertex_program2;\n");
        priv_ctx.target_version = NV2;
4135
        shader_addline(buffer, "ADDRESS aL;\n");
4136 4137 4138 4139
    } else {
        priv_ctx.target_version = ARB;
    }

4140
    shader_addline(buffer, "TEMP TMP_OUT;\n");
4141 4142
    if (reg_maps->fog)
        shader_addline(buffer, "TEMP TMP_FOGCOORD;\n");
4143
    if (need_helper_const(shader_data, reg_maps, gl_info))
4144
    {
4145
        char ftoa_tmp[17];
4146 4147
        wined3d_ftoa(eps, ftoa_tmp);
        shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %s};\n", ftoa_tmp);
4148
    }
4149
    if (need_rel_addr_const(shader_data, reg_maps, gl_info))
4150 4151
    {
        shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", shader_data->rel_offset);
4152
        shader_addline(buffer, "TEMP A0_SHADOW;\n");
4153
    }
4154

4155
    shader_addline(buffer, "TEMP TA;\n");
4156
    shader_addline(buffer, "TEMP TB;\n");
4157 4158

    /* Base Declarations */
4159 4160
    shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info,
            &priv_ctx.vs_clipplanes, &priv_ctx);
4161

4162
    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
4163 4164
    {
        compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED;
4165
        if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2)
4166
        {
4167
            const DWORD *control_values = find_loop_control_values(shader, i);
4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181

            if(control_values)
            {
                shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i,
                                control_values[0], control_values[1], control_values[2]);
            }
            else
            {
                compiled->int_consts[i] = next_local;
                compiled->num_int_consts++;
                shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++);
            }
        }
    }
4182 4183

    /* We need a constant to fixup the final position */
4184 4185
    shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local);
    compiled->pos_fixup = next_local++;
4186

4187 4188 4189
    /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values
     * for output parameters. D3D in theory does not do that either, but some applications depend on a
     * proper initialization of the secondary color, and programs using the fixed function pipeline without
4190
     * a replacement shader depend on the texcoord.w being set properly.
4191 4192
     *
     * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This
Austin English's avatar
Austin English committed
4193
     * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So
4194
     * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex-
4195
     * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and
4196 4197
     * this can eat a number of instructions, so skip it unless this cap is set as well
     */
4198 4199
    if (!gl_info->supported[NV_VERTEX_PROGRAM])
    {
4200 4201
        const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001);
        shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init);
4202 4203
    }

4204 4205
    /* The shader starts with the main function */
    priv_ctx.in_main_func = TRUE;
4206
    /* Base Shader Body */
4207
    if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL)))
4208
        return -1;
4209

4210 4211
    if (!priv_ctx.footer_written) vshader_add_footer(&priv_ctx,
            shader_data, args, reg_maps, gl_info, buffer);
4212 4213 4214 4215

    shader_addline(buffer, "END\n");

    /* TODO: change to resource.glObjectHandle or something like that */
4216
    GL_EXTCALL(glGenProgramsARB(1, &ret));
4217

4218 4219
    TRACE("Creating a hw vertex shader, prg=%d\n", ret);
    GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret));
4220

4221
    TRACE("Created hw vertex shader, prg=%d\n", ret);
4222 4223
    if (!shader_arb_compile(gl_info, GL_VERTEX_PROGRAM_ARB, buffer->buffer))
        return -1;
4224

4225
    return ret;
4226 4227
}

4228
/* Context activation is done by the caller. */
4229
static struct arb_ps_compiled_shader *find_arb_pshader(struct wined3d_context_gl *context_gl,
4230
        struct wined3d_shader *shader, const struct arb_ps_compile_args *args)
4231
{
4232
    const struct wined3d_d3d_info *d3d_info = context_gl->c.d3d_info;
4233
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
4234
    struct wined3d_device *device = shader->device;
4235 4236
    UINT i;
    DWORD new_size;
4237
    struct arb_ps_compiled_shader *new_array;
4238
    struct wined3d_string_buffer buffer;
4239 4240 4241
    struct arb_pshader_private *shader_data;
    GLuint ret;

4242
    if (!shader->backend_data)
4243
    {
4244 4245
        struct shader_arb_priv *priv = device->shader_priv;

4246
        shader->backend_data = heap_alloc_zero(sizeof(*shader_data));
4247 4248
        shader_data = shader->backend_data;
        shader_data->clamp_consts = shader->reg_maps.shader_version.major == 1;
4249

4250
        if (shader->reg_maps.shader_version.major < 3)
4251
            shader_data->input_signature_idx = ~0U;
4252
        else
4253
            shader_data->input_signature_idx = find_input_signature(priv, &shader->input_signature);
4254 4255 4256

        TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx);

4257
        if (!d3d_info->vs_clipping)
4258
            shader_data->clipplane_emulation = shader_find_free_input_register(&shader->reg_maps,
4259
                    d3d_info->limits.ffp_blend_stages - 1);
4260 4261
        else
            shader_data->clipplane_emulation = ~0U;
4262
    }
4263
    shader_data = shader->backend_data;
4264 4265 4266 4267 4268

    /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
     * so a linear search is more performant than a hashmap or a binary search
     * (cache coherency etc)
     */
4269 4270 4271
    for (i = 0; i < shader_data->num_gl_shaders; ++i)
    {
        if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)))
4272
            return &shader_data->gl_shaders[i];
4273 4274 4275
    }

    TRACE("No matching GL shader found, compiling a new shader\n");
4276 4277
    if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
        if (shader_data->num_gl_shaders)
4278
        {
4279
            new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4280
            new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders,
4281
                                    new_size * sizeof(*shader_data->gl_shaders));
4282 4283 4284 4285
        }
        else
        {
            new_array = heap_alloc_zero(sizeof(*shader_data->gl_shaders));
4286 4287 4288 4289 4290 4291 4292
            new_size = 1;
        }

        if(!new_array) {
            ERR("Out of memory\n");
            return 0;
        }
4293 4294
        shader_data->gl_shaders = new_array;
        shader_data->shader_array_size = new_size;
4295 4296
    }

4297
    shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4298

4299
    if (!string_buffer_init(&buffer))
4300 4301 4302 4303 4304
    {
        ERR("Failed to initialize shader buffer.\n");
        return 0;
    }

4305 4306
    ret = shader_arb_generate_pshader(shader, gl_info, &buffer, args,
            &shader_data->gl_shaders[shader_data->num_gl_shaders]);
4307
    string_buffer_free(&buffer);
4308
    shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret;
4309

4310
    return &shader_data->gl_shaders[shader_data->num_gl_shaders++];
4311 4312
}

4313
static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new,
4314
                                 const DWORD use_map, BOOL skip_int) {
4315
    if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE;
4316
    if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE;
4317
    if(stored->super.fog_src != new->super.fog_src) return FALSE;
4318
    if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE;
4319
    if(stored->ps_signature != new->ps_signature) return FALSE;
4320
    if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE;
4321 4322
    if(skip_int) return TRUE;

4323
    return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl));
4324 4325
}

4326
static struct arb_vs_compiled_shader *find_arb_vshader(struct wined3d_shader *shader,
4327
        const struct wined3d_gl_info *gl_info, DWORD use_map, const struct arb_vs_compile_args *args,
4328
        const struct wined3d_shader_signature *ps_input_sig)
4329 4330
{
    UINT i;
4331 4332
    DWORD new_size;
    struct arb_vs_compiled_shader *new_array;
4333
    struct wined3d_string_buffer buffer;
4334
    struct arb_vshader_private *shader_data;
4335 4336
    GLuint ret;

4337
    if (!shader->backend_data)
4338
    {
4339
        const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4340

4341
        shader->backend_data = heap_alloc_zero(sizeof(*shader_data));
4342
        shader_data = shader->backend_data;
4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357

        if ((gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT)
                && reg_maps->min_rel_offset <= reg_maps->max_rel_offset)
        {
            if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 127)
            {
                FIXME("The difference between the minimum and maximum relative offset is > 127.\n");
                FIXME("Which this OpenGL implementation does not support. Try using GLSL.\n");
                FIXME("Min: %u, Max: %u.\n", reg_maps->min_rel_offset, reg_maps->max_rel_offset);
            }
            else if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 63)
                shader_data->rel_offset = reg_maps->min_rel_offset + 63;
            else if (reg_maps->max_rel_offset > 63)
                shader_data->rel_offset = reg_maps->min_rel_offset;
        }
4358
    }
4359
    shader_data = shader->backend_data;
4360

4361 4362 4363 4364
    /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
     * so a linear search is more performant than a hashmap or a binary search
     * (cache coherency etc)
     */
4365
    for(i = 0; i < shader_data->num_gl_shaders; i++) {
4366 4367 4368
        if (vs_args_equal(&shader_data->gl_shaders[i].args, args,
                use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]))
        {
4369
            return &shader_data->gl_shaders[i];
4370 4371 4372 4373 4374
        }
    }

    TRACE("No matching GL shader found, compiling a new shader\n");

4375 4376
    if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
        if (shader_data->num_gl_shaders)
4377
        {
4378
            new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4379
            new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders,
4380
                                    new_size * sizeof(*shader_data->gl_shaders));
4381 4382 4383 4384
        }
        else
        {
            new_array = heap_alloc_zero(sizeof(*shader_data->gl_shaders));
4385 4386 4387 4388 4389 4390 4391
            new_size = 1;
        }

        if(!new_array) {
            ERR("Out of memory\n");
            return 0;
        }
4392 4393
        shader_data->gl_shaders = new_array;
        shader_data->shader_array_size = new_size;
4394 4395
    }

4396
    shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4397

4398
    if (!string_buffer_init(&buffer))
4399 4400 4401 4402 4403
    {
        ERR("Failed to initialize shader buffer.\n");
        return 0;
    }

4404
    ret = shader_arb_generate_vshader(shader, gl_info, &buffer, args,
4405 4406
            &shader_data->gl_shaders[shader_data->num_gl_shaders],
            ps_input_sig);
4407
    string_buffer_free(&buffer);
4408
    shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret;
4409

4410
    return &shader_data->gl_shaders[shader_data->num_gl_shaders++];
4411 4412
}

4413
static void find_arb_ps_compile_args(const struct wined3d_state *state,
4414
        const struct wined3d_context_gl *context_gl, const struct wined3d_shader *shader,
4415
        struct arb_ps_compile_args *args)
4416
{
4417
    const struct wined3d_d3d_info *d3d_info = context_gl->c.d3d_info;
4418
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
4419
    int i;
4420
    WORD int_skip;
4421

4422
    find_ps_compile_args(state, shader, context_gl->c.stream_info.position_transformed, &args->super, &context_gl->c);
4423 4424

    /* This forces all local boolean constants to 1 to make them stateblock independent */
4425
    args->bools = shader->reg_maps.local_bool_consts;
4426

4427
    for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
4428
    {
4429
        if (state->ps_consts_b[i])
4430
            args->bools |= ( 1u << i);
4431
    }
4432

4433 4434 4435 4436
    /* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction
     * is quite expensive because it forces the driver to disable early Z discards. It is cheaper to
     * duplicate the shader than have a no-op KIL instruction in every shader
     */
4437
    if (!d3d_info->vs_clipping && use_vs(state)
4438 4439
            && state->render_states[WINED3D_RS_CLIPPING]
            && state->render_states[WINED3D_RS_CLIPPLANEENABLE])
4440 4441 4442 4443
        args->clip = 1;
    else
        args->clip = 0;

4444
    /* Skip if unused or local, or supported natively */
4445
    int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts;
4446
    if (int_skip == 0xffff || gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION])
4447
    {
4448
        memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl));
4449 4450 4451
        return;
    }

4452
    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
4453
    {
4454
        if (int_skip & (1u << i))
4455 4456 4457 4458 4459 4460 4461
        {
            args->loop_ctrl[i][0] = 0;
            args->loop_ctrl[i][1] = 0;
            args->loop_ctrl[i][2] = 0;
        }
        else
        {
4462 4463 4464
            args->loop_ctrl[i][0] = state->ps_consts_i[i].x;
            args->loop_ctrl[i][1] = state->ps_consts_i[i].y;
            args->loop_ctrl[i][2] = state->ps_consts_i[i].z;
4465 4466
        }
    }
4467 4468
}

4469
static void find_arb_vs_compile_args(const struct wined3d_state *state,
4470
        const struct wined3d_context_gl *context_gl, const struct wined3d_shader *shader,
4471
        struct arb_vs_compile_args *args)
4472
{
4473
    const struct wined3d_d3d_info *d3d_info = context_gl->c.d3d_info;
4474
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
4475
    const struct wined3d_device *device = shader->device;
4476
    const struct wined3d_adapter *adapter = device->adapter;
4477
    int i;
4478
    WORD int_skip;
4479

4480
    find_vs_compile_args(state, shader, context_gl->c.stream_info.swizzle_map, &args->super, &context_gl->c);
4481

4482
    args->clip.boolclip_compare = 0;
4483
    if (use_ps(state))
4484
    {
4485
        const struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL];
4486
        const struct arb_pshader_private *shader_priv = ps->backend_data;
4487
        args->ps_signature = shader_priv->input_signature_idx;
4488

4489
        args->clip.boolclip.clip_texcoord = shader_priv->clipplane_emulation + 1;
4490 4491 4492 4493
    }
    else
    {
        args->ps_signature = ~0;
4494
        if (!d3d_info->vs_clipping && adapter->fragment_pipe == &arbfp_fragment_pipeline)
4495
            args->clip.boolclip.clip_texcoord = ffp_clip_emul(&context_gl->c) ? d3d_info->limits.ffp_blend_stages : 0;
4496
        /* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */
4497 4498
    }

4499
    if (args->clip.boolclip.clip_texcoord)
4500
    {
4501 4502
        if (state->render_states[WINED3D_RS_CLIPPING])
            args->clip.boolclip.clipplane_mask = (unsigned char)state->render_states[WINED3D_RS_CLIPPLANEENABLE];
4503
        /* clipplane_mask was set to 0 by setting boolclip_compare to 0 */
4504 4505
    }

4506
    /* This forces all local boolean constants to 1 to make them stateblock independent */
4507
    args->clip.boolclip.bools = shader->reg_maps.local_bool_consts;
4508
    /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */
4509
    for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i)
4510
    {
4511
        if (state->vs_consts_b[i])
4512
            args->clip.boolclip.bools |= (1u << i);
4513 4514
    }

4515 4516 4517
    args->vertex.samplers[0] = context_gl->tex_unit_map[WINED3D_MAX_FRAGMENT_SAMPLERS + 0];
    args->vertex.samplers[1] = context_gl->tex_unit_map[WINED3D_MAX_FRAGMENT_SAMPLERS + 1];
    args->vertex.samplers[2] = context_gl->tex_unit_map[WINED3D_MAX_FRAGMENT_SAMPLERS + 2];
4518
    args->vertex.samplers[3] = 0;
4519

4520
    /* Skip if unused or local */
4521
    int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts;
4522 4523
    /* This is about flow control, not clipping. */
    if (int_skip == 0xffff || gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])
4524
    {
4525
        memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl));
4526 4527 4528
        return;
    }

4529
    for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
4530
    {
4531
        if (int_skip & (1u << i))
4532 4533 4534 4535 4536 4537 4538
        {
            args->loop_ctrl[i][0] = 0;
            args->loop_ctrl[i][1] = 0;
            args->loop_ctrl[i][2] = 0;
        }
        else
        {
4539 4540 4541
            args->loop_ctrl[i][0] = state->vs_consts_i[i].x;
            args->loop_ctrl[i][1] = state->vs_consts_i[i].y;
            args->loop_ctrl[i][2] = state->vs_consts_i[i].z;
4542 4543
        }
    }
4544 4545
}

4546
/* Context activation is done by the caller. */
4547
static void shader_arb_select(void *shader_priv, struct wined3d_context *context,
4548
        const struct wined3d_state *state)
4549
{
4550
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
4551
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
4552
    struct shader_arb_priv *priv = shader_priv;
4553
    int i;
4554

4555
    /* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */
4556
    if (use_ps(state))
4557
    {
4558
        struct wined3d_shader *ps = state->shader[WINED3D_SHADER_TYPE_PIXEL];
4559
        struct arb_ps_compile_args compile_args;
4560
        struct arb_ps_compiled_shader *compiled;
4561

4562
        TRACE("Using pixel shader %p.\n", ps);
4563
        find_arb_ps_compile_args(state, context_gl, ps, &compile_args);
4564
        compiled = find_arb_pshader(context_gl, ps, &compile_args);
4565 4566
        priv->current_fprogram_id = compiled->prgId;
        priv->compiled_fprog = compiled;
4567 4568 4569 4570 4571

        /* Bind the fragment program */
        GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id));
        checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);");

4572
        if (!priv->use_arbfp_fixed_func)
4573
            priv->fragment_pipe->fp_enable(context, FALSE);
4574

4575 4576 4577 4578
        /* Enable OpenGL fragment programs. */
        gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB);
        checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);");

4579
        TRACE("Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", priv->current_fprogram_id);
4580

4581 4582 4583
        /* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between
         * a 1.x and newer shader, reload the first 8 constants
         */
4584
        if (priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->backend_data)->clamp_consts)
4585
        {
4586
            priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->backend_data)->clamp_consts;
4587
            priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, 8);
4588 4589
            for(i = 0; i < 8; i++)
            {
4590
                priv->pshader_const_dirty[i] = 1;
4591 4592
            }
            /* Also takes care of loading local constants */
4593
            shader_arb_load_constants_internal(shader_priv, context_gl, state, TRUE, FALSE, TRUE);
4594 4595 4596
        }
        else
        {
4597
            UINT rt_height = state->fb.render_targets[0]->height;
4598
            shader_arb_ps_local_constants(compiled, context_gl, state, rt_height);
4599
        }
4600 4601

        /* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */
4602
        if (compiled->np2fixup_info.super.active)
4603
            context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP;
4604 4605 4606

        if (ps->load_local_constsF)
            context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F;
4607
    }
4608
    else
4609
    {
4610 4611 4612 4613 4614 4615 4616 4617 4618 4619
        if (gl_info->supported[ARB_FRAGMENT_PROGRAM] && !priv->use_arbfp_fixed_func)
        {
            /* Disable only if we're not using arbfp fixed function fragment
             * processing. If this is used, keep GL_FRAGMENT_PROGRAM_ARB
             * enabled, and the fixed function pipeline will bind the fixed
             * function replacement shader. */
            gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB);
            checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
            priv->current_fprogram_id = 0;
        }
4620
        priv->fragment_pipe->fp_enable(context, TRUE);
4621
    }
4622

4623
    if (use_vs(state))
4624
    {
4625
        struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];
4626 4627
        struct arb_vs_compile_args compile_args;
        struct arb_vs_compiled_shader *compiled;
4628
        const struct wined3d_shader_signature *ps_input_sig;
4629

4630
        TRACE("Using vertex shader %p\n", vs);
4631
        find_arb_vs_compile_args(state, context_gl, vs, &compile_args);
4632 4633 4634 4635 4636 4637 4638

        /* Instead of searching for the signature in the signature list, read the one from the
         * current pixel shader. It's maybe not the shader where the signature came from, but it
         * is the same signature and faster to find. */
        if (compile_args.ps_signature == ~0U)
            ps_input_sig = NULL;
        else
4639
            ps_input_sig = &state->shader[WINED3D_SHADER_TYPE_PIXEL]->input_signature;
4640

4641
        compiled = find_arb_vshader(vs, gl_info, context->stream_info.use_map,
4642
                &compile_args, ps_input_sig);
4643 4644 4645 4646 4647 4648 4649
        priv->current_vprogram_id = compiled->prgId;
        priv->compiled_vprog = compiled;

        /* Bind the vertex program */
        GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id));
        checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);");

4650
        priv->vertex_pipe->vp_enable(context, FALSE);
4651

4652
        /* Enable OpenGL vertex programs */
4653
        gl_info->gl_ops.gl.p_glEnable(GL_VERTEX_PROGRAM_ARB);
4654
        checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
4655
        TRACE("Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", priv->current_vprogram_id);
4656
        shader_arb_vs_local_constants(compiled, context_gl, state);
4657 4658 4659 4660

        if(priv->last_vs_color_unclamp != compiled->need_color_unclamp) {
            priv->last_vs_color_unclamp = compiled->need_color_unclamp;

4661 4662
            if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
            {
4663 4664 4665 4666 4667 4668
                GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, !compiled->need_color_unclamp));
                checkGLcall("glClampColorARB");
            } else {
                FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
            }
        }
4669 4670 4671

        if (vs->load_local_constsF)
            context->constant_update_mask |= WINED3D_SHADER_CONST_VS_F;
4672
    }
4673 4674 4675 4676 4677 4678 4679 4680
    else
    {
        if (gl_info->supported[ARB_VERTEX_PROGRAM])
        {
            priv->current_vprogram_id = 0;
            gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB);
            checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
        }
4681
        priv->vertex_pipe->vp_enable(context, TRUE);
4682 4683 4684
    }
}

4685 4686 4687 4688 4689
static void shader_arb_select_compute(void *shader_priv, struct wined3d_context *context,
        const struct wined3d_state *state)
{
    ERR("Compute pipeline not supported by the ARB shader backend.\n");
}
4690 4691

/* Context activation is done by the caller. */
4692
static void shader_arb_disable(void *shader_priv, struct wined3d_context *context)
4693
{
4694 4695
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
4696 4697 4698 4699 4700 4701 4702 4703
    struct shader_arb_priv *priv = shader_priv;

    if (gl_info->supported[ARB_FRAGMENT_PROGRAM])
    {
        gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB);
        checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
        priv->current_fprogram_id = 0;
    }
4704
    priv->fragment_pipe->fp_enable(context, FALSE);
4705 4706

    if (gl_info->supported[ARB_VERTEX_PROGRAM])
4707
    {
4708
        priv->current_vprogram_id = 0;
4709
        gl_info->gl_ops.gl.p_glDisable(GL_VERTEX_PROGRAM_ARB);
4710 4711
        checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
    }
4712
    priv->vertex_pipe->vp_enable(context, FALSE);
4713 4714 4715 4716 4717 4718 4719

    if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT] && priv->last_vs_color_unclamp)
    {
        GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, GL_FIXED_ONLY_ARB));
        checkGLcall("glClampColorARB");
        priv->last_vs_color_unclamp = FALSE;
    }
4720

4721 4722
    context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL)
            | (1u << WINED3D_SHADER_TYPE_VERTEX)
4723
            | (1u << WINED3D_SHADER_TYPE_GEOMETRY)
4724
            | (1u << WINED3D_SHADER_TYPE_HULL)
4725 4726
            | (1u << WINED3D_SHADER_TYPE_DOMAIN)
            | (1u << WINED3D_SHADER_TYPE_COMPUTE);
4727 4728
}

4729
static void shader_arb_destroy(struct wined3d_shader *shader)
4730
{
4731
    struct wined3d_device *device = shader->device;
4732 4733 4734 4735 4736 4737 4738 4739 4740
    const struct wined3d_gl_info *gl_info;
    struct wined3d_context *context;
    unsigned int i;

    /* This can happen if a shader was never compiled */
    if (!shader->backend_data)
        return;

    context = context_acquire(device, NULL, 0);
4741
    gl_info = wined3d_context_gl(context)->gl_info;
4742

4743
    if (shader_is_pshader_version(shader->reg_maps.shader_version.type))
4744
    {
4745
        struct arb_pshader_private *shader_data = shader->backend_data;
4746

4747 4748
        for (i = 0; i < shader_data->num_gl_shaders; ++i)
            GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId));
4749

4750
        heap_free(shader_data->gl_shaders);
4751 4752 4753
    }
    else
    {
4754
        struct arb_vshader_private *shader_data = shader->backend_data;
4755

4756 4757
        for (i = 0; i < shader_data->num_gl_shaders; ++i)
            GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId));
4758

4759 4760
        heap_free(shader_data->gl_shaders);
    }
4761

4762
    checkGLcall("delete programs");
4763

4764
    context_release(context);
4765

4766 4767
    heap_free(shader->backend_data);
    shader->backend_data = NULL;
4768 4769
}

4770 4771 4772
static int sig_tree_compare(const void *key, const struct wine_rb_entry *entry)
{
    struct ps_signature *e = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
4773
    return compare_sig(key, &e->sig);
4774 4775
}

4776
static HRESULT shader_arb_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe,
4777
        const struct wined3d_fragment_pipe_ops *fragment_pipe)
4778
{
4779
    const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info;
4780
    struct fragment_caps fragment_caps;
4781
    void *vertex_priv, *fragment_priv;
4782 4783 4784 4785
    struct shader_arb_priv *priv;

    if (!(priv = heap_alloc_zero(sizeof(*priv))))
        return E_OUTOFMEMORY;
4786 4787 4788 4789

    if (!(vertex_priv = vertex_pipe->vp_alloc(&arb_program_shader_backend, priv)))
    {
        ERR("Failed to initialize vertex pipe.\n");
4790
        heap_free(priv);
4791 4792
        return E_FAIL;
    }
4793

4794
    if (!(fragment_priv = fragment_pipe->alloc_private(&arb_program_shader_backend, priv)))
4795
    {
4796
        ERR("Failed to initialize fragment pipe.\n");
4797
        vertex_pipe->vp_free(device, NULL);
4798
        heap_free(priv);
4799
        return E_FAIL;
4800
    }
4801 4802

    memset(priv->vshader_const_dirty, 1,
4803
           sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count);
4804
    memset(priv->pshader_const_dirty, 1,
4805
            sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count);
4806

4807
    wine_rb_init(&priv->signature_tree, sig_tree_compare);
4808

4809 4810
    priv->vertex_pipe = vertex_pipe;
    priv->fragment_pipe = fragment_pipe;
4811
    fragment_pipe->get_caps(device->adapter, &fragment_caps);
4812
    priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL;
4813 4814

    device->vertex_priv = vertex_priv;
4815
    device->fragment_priv = fragment_priv;
4816
    device->shader_priv = priv;
4817

4818 4819 4820
    return WINED3D_OK;
}

4821 4822 4823
static void release_signature(struct wine_rb_entry *entry, void *context)
{
    struct ps_signature *sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
4824 4825 4826
    unsigned int i;

    for (i = 0; i < sig->sig.element_count; ++i)
4827
    {
4828
        heap_free((char *)sig->sig.elements[i].semantic_name);
4829
    }
4830 4831
    heap_free(sig->sig.elements);
    heap_free(sig);
4832 4833
}

4834
/* Context activation is done by the caller. */
4835
static void shader_arb_free(struct wined3d_device *device, struct wined3d_context *context)
4836 4837
{
    struct shader_arb_priv *priv = device->shader_priv;
4838

4839
    wine_rb_destroy(&priv->signature_tree, release_signature, NULL);
4840 4841
    priv->fragment_pipe->free_private(device, context);
    priv->vertex_pipe->vp_free(device, context);
4842
    heap_free(device->shader_priv);
4843 4844
}

4845
static BOOL shader_arb_allocate_context_data(struct wined3d_context *context)
4846
{
4847 4848 4849 4850 4851
    return TRUE;
}

static void shader_arb_free_context_data(struct wined3d_context *context)
{
4852
    struct shader_arb_priv *priv;
4853

4854
    priv = context->device->shader_priv;
4855 4856
    if (priv->last_context == context)
        priv->last_context = NULL;
4857 4858
}

4859 4860
static void shader_arb_init_context_state(struct wined3d_context *context) {}

4861
static void shader_arb_get_caps(const struct wined3d_adapter *adapter, struct shader_caps *caps)
4862
{
4863 4864
    const struct wined3d_gl_info *gl_info = &adapter->gl_info;

4865 4866
    if (gl_info->supported[ARB_VERTEX_PROGRAM])
    {
4867
        DWORD vs_consts;
4868
        UINT vs_version;
4869 4870 4871 4872 4873 4874 4875 4876 4877 4878

        /* 96 is the minimum allowed value of MAX_PROGRAM_ENV_PARAMETERS_ARB
         * for vertex programs. If the native limit is less than that it's
         * not very useful, and e.g. Mesa swrast returns 0, probably to
         * indicate it's a software implementation. */
        if (gl_info->limits.arb_vs_native_constants < 96)
            vs_consts = gl_info->limits.arb_vs_float_constants;
        else
            vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants);

4879
        if (gl_info->supported[NV_VERTEX_PROGRAM3])
4880
        {
4881
            vs_version = 3;
4882
            TRACE("Hardware vertex shader version 3.0 enabled (NV_VERTEX_PROGRAM3)\n");
4883
        }
4884
        else if (vs_consts >= 256)
4885 4886
        {
            /* Shader Model 2.0 requires at least 256 vertex shader constants */
4887
            vs_version = 2;
4888
            TRACE("Hardware vertex shader version 2.0 enabled (ARB_PROGRAM)\n");
4889 4890 4891
        }
        else
        {
4892
            vs_version = 1;
4893
            TRACE("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n");
4894
        }
4895
        caps->vs_version = min(wined3d_settings.max_sm_vs, vs_version);
4896
        caps->vs_uniform_count = min(WINED3D_MAX_VS_CONSTS_F, vs_consts);
4897
    }
4898 4899
    else
    {
4900 4901
        caps->vs_version = 0;
        caps->vs_uniform_count = 0;
4902
    }
4903

4904
    caps->hs_version = 0;
4905
    caps->ds_version = 0;
4906
    caps->gs_version = 0;
4907
    caps->cs_version = 0;
4908

4909 4910
    if (gl_info->supported[ARB_FRAGMENT_PROGRAM])
    {
4911
        DWORD ps_consts;
4912
        UINT ps_version;
4913 4914 4915 4916 4917 4918 4919 4920

        /* Similar as above for vertex programs, but the minimum for fragment
         * programs is 24. */
        if (gl_info->limits.arb_ps_native_constants < 24)
            ps_consts = gl_info->limits.arb_ps_float_constants;
        else
            ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants);

4921
        if (gl_info->supported[NV_FRAGMENT_PROGRAM2])
4922
        {
4923
            ps_version = 3;
4924
            TRACE("Hardware pixel shader version 3.0 enabled (NV_FRAGMENT_PROGRAM2)\n");
4925
        }
4926
        else if (ps_consts >= 32)
4927
        {
4928
            /* Shader Model 2.0 requires at least 32 pixel shader constants */
4929
            ps_version = 2;
4930
            TRACE("Hardware pixel shader version 2.0 enabled (ARB_PROGRAM)\n");
4931 4932 4933
        }
        else
        {
4934
            ps_version = 1;
4935
            TRACE("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n");
4936
        }
4937
        caps->ps_version = min(wined3d_settings.max_sm_ps, ps_version);
4938
        caps->ps_uniform_count = min(WINED3D_MAX_PS_CONSTS_F, ps_consts);
4939
        caps->ps_1x_max_value = 8.0f;
4940
    }
4941 4942
    else
    {
4943 4944 4945
        caps->ps_version = 0;
        caps->ps_uniform_count = 0;
        caps->ps_1x_max_value = 0.0f;
4946
    }
4947

4948
    caps->varying_count = 0;
4949
    caps->wined3d_caps = WINED3D_SHADER_CAP_SRGB_WRITE;
4950 4951
    if (use_nv_clip(gl_info))
        caps->wined3d_caps |= WINED3D_SHADER_CAP_VS_CLIPPING;
4952 4953
}

4954 4955
static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup)
{
4956
    /* We support everything except complex conversions. */
4957
    return !is_complex_fixup(fixup);
4958 4959
}

4960
static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) {
4961 4962
    DWORD shift;
    char write_mask[20], regstr[50];
4963
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
4964 4965 4966 4967 4968 4969 4970
    BOOL is_color = FALSE;
    const struct wined3d_shader_dst_param *dst;

    if (!ins->dst_count) return;

    dst = &ins->dst[0];
    shift = dst->shift;
4971
    if (!shift) return; /* Saturate alone is handled by the instructions */
4972 4973

    shader_arb_get_write_mask(ins, dst, write_mask);
4974
    shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color);
4975

4976 4977 4978 4979 4980
    /* Generate a line that does the output modifier computation
     * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this
     * maps problems in e.g. _d4_sat modify shader_arb_get_modifier
     */
    shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins),
4981
                   regstr, write_mask, regstr, shift_tab[shift]);
4982 4983
}

4984 4985
static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
{
4986 4987 4988
    /* WINED3DSIH_ABS                              */ shader_hw_map2gl,
    /* WINED3DSIH_ADD                              */ shader_hw_map2gl,
    /* WINED3DSIH_AND                              */ NULL,
4989
    /* WINED3DSIH_ATOMIC_AND                       */ NULL,
4990
    /* WINED3DSIH_ATOMIC_CMP_STORE                 */ NULL,
4991
    /* WINED3DSIH_ATOMIC_IADD                      */ NULL,
4992
    /* WINED3DSIH_ATOMIC_IMAX                      */ NULL,
4993
    /* WINED3DSIH_ATOMIC_IMIN                      */ NULL,
4994
    /* WINED3DSIH_ATOMIC_OR                        */ NULL,
4995
    /* WINED3DSIH_ATOMIC_UMAX                      */ NULL,
4996
    /* WINED3DSIH_ATOMIC_UMIN                      */ NULL,
4997
    /* WINED3DSIH_ATOMIC_XOR                       */ NULL,
4998
    /* WINED3DSIH_BEM                              */ pshader_hw_bem,
4999
    /* WINED3DSIH_BFI                              */ NULL,
5000
    /* WINED3DSIH_BFREV                            */ NULL,
5001 5002 5003
    /* WINED3DSIH_BREAK                            */ shader_hw_break,
    /* WINED3DSIH_BREAKC                           */ shader_hw_breakc,
    /* WINED3DSIH_BREAKP                           */ NULL,
5004
    /* WINED3DSIH_BUFINFO                          */ NULL,
5005 5006
    /* WINED3DSIH_CALL                             */ shader_hw_call,
    /* WINED3DSIH_CALLNZ                           */ NULL,
5007
    /* WINED3DSIH_CASE                             */ NULL,
5008 5009
    /* WINED3DSIH_CMP                              */ pshader_hw_cmp,
    /* WINED3DSIH_CND                              */ pshader_hw_cnd,
5010
    /* WINED3DSIH_CONTINUE                         */ NULL,
5011
    /* WINED3DSIH_CONTINUEP                        */ NULL,
5012
    /* WINED3DSIH_COUNTBITS                        */ NULL,
5013 5014
    /* WINED3DSIH_CRS                              */ shader_hw_map2gl,
    /* WINED3DSIH_CUT                              */ NULL,
5015
    /* WINED3DSIH_CUT_STREAM                       */ NULL,
5016 5017
    /* WINED3DSIH_DCL                              */ shader_hw_nop,
    /* WINED3DSIH_DCL_CONSTANT_BUFFER              */ shader_hw_nop,
5018 5019
    /* WINED3DSIH_DCL_FUNCTION_BODY                */ NULL,
    /* WINED3DSIH_DCL_FUNCTION_TABLE               */ NULL,
5020
    /* WINED3DSIH_DCL_GLOBAL_FLAGS                 */ NULL,
5021
    /* WINED3DSIH_DCL_GS_INSTANCES                 */ NULL,
5022
    /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ NULL,
5023
    /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ NULL,
5024
    /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR            */ NULL,
5025
    /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER    */ NULL,
5026
    /* WINED3DSIH_DCL_INDEX_RANGE                  */ NULL,
5027
    /* WINED3DSIH_DCL_INDEXABLE_TEMP               */ NULL,
5028 5029 5030 5031 5032 5033 5034 5035
    /* WINED3DSIH_DCL_INPUT                        */ NULL,
    /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT    */ NULL,
    /* WINED3DSIH_DCL_INPUT_PRIMITIVE              */ shader_hw_nop,
    /* WINED3DSIH_DCL_INPUT_PS                     */ NULL,
    /* WINED3DSIH_DCL_INPUT_PS_SGV                 */ NULL,
    /* WINED3DSIH_DCL_INPUT_PS_SIV                 */ NULL,
    /* WINED3DSIH_DCL_INPUT_SGV                    */ NULL,
    /* WINED3DSIH_DCL_INPUT_SIV                    */ NULL,
5036
    /* WINED3DSIH_DCL_INTERFACE                    */ NULL,
5037 5038 5039 5040
    /* WINED3DSIH_DCL_OUTPUT                       */ NULL,
    /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT   */ NULL,
    /* WINED3DSIH_DCL_OUTPUT_SIV                   */ NULL,
    /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY              */ shader_hw_nop,
5041
    /* WINED3DSIH_DCL_RESOURCE_RAW                 */ NULL,
5042 5043
    /* WINED3DSIH_DCL_RESOURCE_STRUCTURED          */ NULL,
    /* WINED3DSIH_DCL_SAMPLER                      */ NULL,
5044
    /* WINED3DSIH_DCL_STREAM                       */ NULL,
5045
    /* WINED3DSIH_DCL_TEMPS                        */ NULL,
5046
    /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN           */ NULL,
5047
    /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ NULL,
5048
    /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING     */ NULL,
5049
    /* WINED3DSIH_DCL_TGSM_RAW                     */ NULL,
5050
    /* WINED3DSIH_DCL_TGSM_STRUCTURED              */ NULL,
5051
    /* WINED3DSIH_DCL_THREAD_GROUP                 */ NULL,
5052
    /* WINED3DSIH_DCL_UAV_RAW                      */ NULL,
5053
    /* WINED3DSIH_DCL_UAV_STRUCTURED               */ NULL,
5054 5055 5056
    /* WINED3DSIH_DCL_UAV_TYPED                    */ NULL,
    /* WINED3DSIH_DCL_VERTICES_OUT                 */ shader_hw_nop,
    /* WINED3DSIH_DEF                              */ shader_hw_nop,
5057
    /* WINED3DSIH_DEFAULT                          */ NULL,
5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073
    /* WINED3DSIH_DEFB                             */ shader_hw_nop,
    /* WINED3DSIH_DEFI                             */ shader_hw_nop,
    /* WINED3DSIH_DIV                              */ NULL,
    /* WINED3DSIH_DP2                              */ NULL,
    /* WINED3DSIH_DP2ADD                           */ pshader_hw_dp2add,
    /* WINED3DSIH_DP3                              */ shader_hw_map2gl,
    /* WINED3DSIH_DP4                              */ shader_hw_map2gl,
    /* WINED3DSIH_DST                              */ shader_hw_map2gl,
    /* WINED3DSIH_DSX                              */ shader_hw_map2gl,
    /* WINED3DSIH_DSX_COARSE                       */ NULL,
    /* WINED3DSIH_DSX_FINE                         */ NULL,
    /* WINED3DSIH_DSY                              */ shader_hw_dsy,
    /* WINED3DSIH_DSY_COARSE                       */ NULL,
    /* WINED3DSIH_DSY_FINE                         */ NULL,
    /* WINED3DSIH_ELSE                             */ shader_hw_else,
    /* WINED3DSIH_EMIT                             */ NULL,
5074
    /* WINED3DSIH_EMIT_STREAM                      */ NULL,
5075 5076 5077
    /* WINED3DSIH_ENDIF                            */ shader_hw_endif,
    /* WINED3DSIH_ENDLOOP                          */ shader_hw_endloop,
    /* WINED3DSIH_ENDREP                           */ shader_hw_endrep,
5078
    /* WINED3DSIH_ENDSWITCH                        */ NULL,
5079
    /* WINED3DSIH_EQ                               */ NULL,
5080
    /* WINED3DSIH_EVAL_SAMPLE_INDEX                */ NULL,
5081 5082
    /* WINED3DSIH_EXP                              */ shader_hw_scalar_op,
    /* WINED3DSIH_EXPP                             */ shader_hw_scalar_op,
5083
    /* WINED3DSIH_F16TOF32                         */ NULL,
5084
    /* WINED3DSIH_F32TOF16                         */ NULL,
5085
    /* WINED3DSIH_FCALL                            */ NULL,
5086 5087 5088
    /* WINED3DSIH_FIRSTBIT_HI                      */ NULL,
    /* WINED3DSIH_FIRSTBIT_LO                      */ NULL,
    /* WINED3DSIH_FIRSTBIT_SHI                     */ NULL,
5089 5090 5091
    /* WINED3DSIH_FRC                              */ shader_hw_map2gl,
    /* WINED3DSIH_FTOI                             */ NULL,
    /* WINED3DSIH_FTOU                             */ NULL,
5092
    /* WINED3DSIH_GATHER4                          */ NULL,
5093
    /* WINED3DSIH_GATHER4_C                        */ NULL,
5094
    /* WINED3DSIH_GATHER4_PO                       */ NULL,
5095
    /* WINED3DSIH_GATHER4_PO_C                     */ NULL,
5096
    /* WINED3DSIH_GE                               */ NULL,
5097
    /* WINED3DSIH_HS_CONTROL_POINT_PHASE           */ NULL,
5098 5099
    /* WINED3DSIH_HS_DECLS                         */ NULL,
    /* WINED3DSIH_HS_FORK_PHASE                    */ NULL,
5100
    /* WINED3DSIH_HS_JOIN_PHASE                    */ NULL,
5101
    /* WINED3DSIH_IADD                             */ NULL,
5102
    /* WINED3DSIH_IBFE                             */ NULL,
5103 5104 5105 5106 5107 5108 5109 5110
    /* WINED3DSIH_IEQ                              */ NULL,
    /* WINED3DSIH_IF                               */ NULL /* Hardcoded into the shader */,
    /* WINED3DSIH_IFC                              */ shader_hw_ifc,
    /* WINED3DSIH_IGE                              */ NULL,
    /* WINED3DSIH_ILT                              */ NULL,
    /* WINED3DSIH_IMAD                             */ NULL,
    /* WINED3DSIH_IMAX                             */ NULL,
    /* WINED3DSIH_IMIN                             */ NULL,
5111
    /* WINED3DSIH_IMM_ATOMIC_ALLOC                 */ NULL,
5112
    /* WINED3DSIH_IMM_ATOMIC_AND                   */ NULL,
5113
    /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH              */ NULL,
5114
    /* WINED3DSIH_IMM_ATOMIC_CONSUME               */ NULL,
5115
    /* WINED3DSIH_IMM_ATOMIC_EXCH                  */ NULL,
5116
    /* WINED3DSIH_IMM_ATOMIC_IADD                  */ NULL,
5117
    /* WINED3DSIH_IMM_ATOMIC_IMAX                  */ NULL,
5118
    /* WINED3DSIH_IMM_ATOMIC_IMIN                  */ NULL,
5119
    /* WINED3DSIH_IMM_ATOMIC_OR                    */ NULL,
5120
    /* WINED3DSIH_IMM_ATOMIC_UMAX                  */ NULL,
5121
    /* WINED3DSIH_IMM_ATOMIC_UMIN                  */ NULL,
5122
    /* WINED3DSIH_IMM_ATOMIC_XOR                   */ NULL,
5123 5124 5125 5126
    /* WINED3DSIH_IMUL                             */ NULL,
    /* WINED3DSIH_INE                              */ NULL,
    /* WINED3DSIH_INEG                             */ NULL,
    /* WINED3DSIH_ISHL                             */ NULL,
5127
    /* WINED3DSIH_ISHR                             */ NULL,
5128 5129 5130 5131
    /* WINED3DSIH_ITOF                             */ NULL,
    /* WINED3DSIH_LABEL                            */ shader_hw_label,
    /* WINED3DSIH_LD                               */ NULL,
    /* WINED3DSIH_LD2DMS                           */ NULL,
5132
    /* WINED3DSIH_LD_RAW                           */ NULL,
5133
    /* WINED3DSIH_LD_STRUCTURED                    */ NULL,
5134
    /* WINED3DSIH_LD_UAV_TYPED                     */ NULL,
5135
    /* WINED3DSIH_LIT                              */ shader_hw_map2gl,
5136
    /* WINED3DSIH_LOD                              */ NULL,
5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164
    /* WINED3DSIH_LOG                              */ shader_hw_scalar_op,
    /* WINED3DSIH_LOGP                             */ shader_hw_scalar_op,
    /* WINED3DSIH_LOOP                             */ shader_hw_loop,
    /* WINED3DSIH_LRP                              */ shader_hw_lrp,
    /* WINED3DSIH_LT                               */ NULL,
    /* WINED3DSIH_M3x2                             */ shader_hw_mnxn,
    /* WINED3DSIH_M3x3                             */ shader_hw_mnxn,
    /* WINED3DSIH_M3x4                             */ shader_hw_mnxn,
    /* WINED3DSIH_M4x3                             */ shader_hw_mnxn,
    /* WINED3DSIH_M4x4                             */ shader_hw_mnxn,
    /* WINED3DSIH_MAD                              */ shader_hw_map2gl,
    /* WINED3DSIH_MAX                              */ shader_hw_map2gl,
    /* WINED3DSIH_MIN                              */ shader_hw_map2gl,
    /* WINED3DSIH_MOV                              */ shader_hw_mov,
    /* WINED3DSIH_MOVA                             */ shader_hw_mov,
    /* WINED3DSIH_MOVC                             */ NULL,
    /* WINED3DSIH_MUL                              */ shader_hw_map2gl,
    /* WINED3DSIH_NE                               */ NULL,
    /* WINED3DSIH_NOP                              */ shader_hw_nop,
    /* WINED3DSIH_NOT                              */ NULL,
    /* WINED3DSIH_NRM                              */ shader_hw_nrm,
    /* WINED3DSIH_OR                               */ NULL,
    /* WINED3DSIH_PHASE                            */ shader_hw_nop,
    /* WINED3DSIH_POW                              */ shader_hw_pow,
    /* WINED3DSIH_RCP                              */ shader_hw_scalar_op,
    /* WINED3DSIH_REP                              */ shader_hw_rep,
    /* WINED3DSIH_RESINFO                          */ NULL,
    /* WINED3DSIH_RET                              */ shader_hw_ret,
5165
    /* WINED3DSIH_RETP                             */ NULL,
5166
    /* WINED3DSIH_ROUND_NE                         */ NULL,
5167 5168 5169 5170 5171 5172 5173 5174 5175
    /* WINED3DSIH_ROUND_NI                         */ NULL,
    /* WINED3DSIH_ROUND_PI                         */ NULL,
    /* WINED3DSIH_ROUND_Z                          */ NULL,
    /* WINED3DSIH_RSQ                              */ shader_hw_scalar_op,
    /* WINED3DSIH_SAMPLE                           */ NULL,
    /* WINED3DSIH_SAMPLE_B                         */ NULL,
    /* WINED3DSIH_SAMPLE_C                         */ NULL,
    /* WINED3DSIH_SAMPLE_C_LZ                      */ NULL,
    /* WINED3DSIH_SAMPLE_GRAD                      */ NULL,
5176
    /* WINED3DSIH_SAMPLE_INFO                      */ NULL,
5177
    /* WINED3DSIH_SAMPLE_LOD                       */ NULL,
5178
    /* WINED3DSIH_SAMPLE_POS                       */ NULL,
5179 5180 5181 5182 5183 5184
    /* WINED3DSIH_SETP                             */ NULL,
    /* WINED3DSIH_SGE                              */ shader_hw_map2gl,
    /* WINED3DSIH_SGN                              */ shader_hw_sgn,
    /* WINED3DSIH_SINCOS                           */ shader_hw_sincos,
    /* WINED3DSIH_SLT                              */ shader_hw_map2gl,
    /* WINED3DSIH_SQRT                             */ NULL,
5185
    /* WINED3DSIH_STORE_RAW                        */ NULL,
5186
    /* WINED3DSIH_STORE_STRUCTURED                 */ NULL,
5187 5188
    /* WINED3DSIH_STORE_UAV_TYPED                  */ NULL,
    /* WINED3DSIH_SUB                              */ shader_hw_map2gl,
5189
    /* WINED3DSIH_SWAPC                            */ NULL,
5190
    /* WINED3DSIH_SWITCH                           */ NULL,
5191
    /* WINED3DSIH_SYNC                             */ NULL,
5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213
    /* WINED3DSIH_TEX                              */ pshader_hw_tex,
    /* WINED3DSIH_TEXBEM                           */ pshader_hw_texbem,
    /* WINED3DSIH_TEXBEML                          */ pshader_hw_texbem,
    /* WINED3DSIH_TEXCOORD                         */ pshader_hw_texcoord,
    /* WINED3DSIH_TEXDEPTH                         */ pshader_hw_texdepth,
    /* WINED3DSIH_TEXDP3                           */ pshader_hw_texdp3,
    /* WINED3DSIH_TEXDP3TEX                        */ pshader_hw_texdp3tex,
    /* WINED3DSIH_TEXKILL                          */ pshader_hw_texkill,
    /* WINED3DSIH_TEXLDD                           */ shader_hw_texldd,
    /* WINED3DSIH_TEXLDL                           */ shader_hw_texldl,
    /* WINED3DSIH_TEXM3x2DEPTH                     */ pshader_hw_texm3x2depth,
    /* WINED3DSIH_TEXM3x2PAD                       */ pshader_hw_texm3x2pad,
    /* WINED3DSIH_TEXM3x2TEX                       */ pshader_hw_texm3x2tex,
    /* WINED3DSIH_TEXM3x3                          */ pshader_hw_texm3x3,
    /* WINED3DSIH_TEXM3x3DIFF                      */ NULL,
    /* WINED3DSIH_TEXM3x3PAD                       */ pshader_hw_texm3x3pad,
    /* WINED3DSIH_TEXM3x3SPEC                      */ pshader_hw_texm3x3spec,
    /* WINED3DSIH_TEXM3x3TEX                       */ pshader_hw_texm3x3tex,
    /* WINED3DSIH_TEXM3x3VSPEC                     */ pshader_hw_texm3x3vspec,
    /* WINED3DSIH_TEXREG2AR                        */ pshader_hw_texreg2ar,
    /* WINED3DSIH_TEXREG2GB                        */ pshader_hw_texreg2gb,
    /* WINED3DSIH_TEXREG2RGB                       */ pshader_hw_texreg2rgb,
5214
    /* WINED3DSIH_UBFE                             */ NULL,
5215 5216
    /* WINED3DSIH_UDIV                             */ NULL,
    /* WINED3DSIH_UGE                              */ NULL,
5217
    /* WINED3DSIH_ULT                              */ NULL,
5218
    /* WINED3DSIH_UMAX                             */ NULL,
5219
    /* WINED3DSIH_UMIN                             */ NULL,
5220
    /* WINED3DSIH_UMUL                             */ NULL,
5221 5222 5223
    /* WINED3DSIH_USHR                             */ NULL,
    /* WINED3DSIH_UTOF                             */ NULL,
    /* WINED3DSIH_XOR                              */ NULL,
5224 5225
};

5226
static BOOL get_bool_const(const struct wined3d_shader_instruction *ins,
5227
        const struct wined3d_shader *shader, DWORD idx)
5228
{
5229
    const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
5230
    BOOL vshader = shader_is_vshader_version(reg_maps->shader_version.type);
5231
    const struct wined3d_shader_lconst *constant;
5232
    WORD bools = 0;
5233
    WORD flag = (1u << idx);
5234 5235
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;

5236
    if (reg_maps->local_bool_consts & flag)
5237
    {
5238
        /* What good is an if(bool) with a hardcoded local constant? I don't know, but handle it */
5239
        LIST_FOR_EACH_ENTRY(constant, &shader->constantsB, struct wined3d_shader_lconst, entry)
5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250
        {
            if (constant->idx == idx)
            {
                return constant->value[0];
            }
        }
        ERR("Local constant not found\n");
        return FALSE;
    }
    else
    {
5251
        if(vshader) bools = priv->cur_vs_args->clip.boolclip.bools;
5252 5253 5254 5255 5256
        else bools = priv->cur_ps_args->bools;
        return bools & flag;
    }
}

5257
static void get_loop_control_const(const struct wined3d_shader_instruction *ins,
5258
        const struct wined3d_shader *shader, UINT idx, struct wined3d_shader_loop_control *loop_control)
5259
{
5260
    const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
5261 5262 5263
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;

    /* Integer constants can either be a local constant, or they can be stored in the shader
5264
     * type specific compile args. */
5265
    if (reg_maps->local_int_consts & (1u << idx))
5266
    {
5267
        const struct wined3d_shader_lconst *constant;
5268

5269
        LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
5270 5271 5272
        {
            if (constant->idx == idx)
            {
5273 5274 5275 5276
                loop_control->count = constant->value[0];
                loop_control->start = constant->value[1];
                /* Step is signed. */
                loop_control->step = (int)constant->value[2];
5277 5278 5279 5280 5281
                return;
            }
        }
        /* If this happens the flag was set incorrectly */
        ERR("Local constant not found\n");
5282 5283 5284
        loop_control->count = 0;
        loop_control->start = 0;
        loop_control->step = 0;
5285 5286
        return;
    }
5287

5288
    switch (reg_maps->shader_version.type)
5289
    {
5290
        case WINED3D_SHADER_TYPE_VERTEX:
5291
            /* Count and aL start value are unsigned */
5292 5293 5294 5295
            loop_control->count = priv->cur_vs_args->loop_ctrl[idx][0];
            loop_control->start = priv->cur_vs_args->loop_ctrl[idx][1];
            /* Step is signed. */
            loop_control->step = ((char)priv->cur_vs_args->loop_ctrl[idx][2]);
5296 5297 5298
            break;

        case WINED3D_SHADER_TYPE_PIXEL:
5299 5300 5301
            loop_control->count = priv->cur_ps_args->loop_ctrl[idx][0];
            loop_control->start = priv->cur_ps_args->loop_ctrl[idx][1];
            loop_control->step = ((char)priv->cur_ps_args->loop_ctrl[idx][2]);
5302 5303 5304
            break;

        default:
5305
            FIXME("Unhandled shader type %#x.\n", reg_maps->shader_version.type);
5306
            break;
5307 5308 5309 5310 5311
    }
}

static void record_instruction(struct list *list, const struct wined3d_shader_instruction *ins)
{
5312
    struct wined3d_shader_src_param *src_param = NULL, *rel_addr;
5313 5314 5315 5316 5317
    struct wined3d_shader_dst_param *dst_param;
    struct recorded_instruction *rec;
    unsigned int i;

    if (!(rec = heap_alloc_zero(sizeof(*rec))))
5318 5319 5320 5321 5322 5323
    {
        ERR("Out of memory\n");
        return;
    }

    rec->ins = *ins;
5324 5325
    if (!(dst_param = heap_alloc(sizeof(*dst_param))))
        goto free;
5326
    *dst_param = *ins->dst;
5327
    if (ins->dst->reg.idx[0].rel_addr)
5328
    {
5329
        if (!(rel_addr = heap_alloc(sizeof(*rel_addr))))
5330 5331 5332
            goto free;
        *rel_addr = *ins->dst->reg.idx[0].rel_addr;
        dst_param->reg.idx[0].rel_addr = rel_addr;
5333 5334 5335
    }
    rec->ins.dst = dst_param;

5336
    if (!(src_param = heap_calloc(ins->src_count, sizeof(*src_param))))
5337 5338
        goto free;
    for (i = 0; i < ins->src_count; ++i)
5339 5340
    {
        src_param[i] = ins->src[i];
5341
        if (ins->src[i].reg.idx[0].rel_addr)
5342
        {
5343
            if (!(rel_addr = heap_alloc(sizeof(*rel_addr))))
5344 5345 5346
                goto free;
            *rel_addr = *ins->src[i].reg.idx[0].rel_addr;
            src_param[i].reg.idx[0].rel_addr = rel_addr;
5347 5348 5349 5350 5351 5352 5353 5354
        }
    }
    rec->ins.src = src_param;
    list_add_tail(list, &rec->entry);
    return;

free:
    ERR("Out of memory\n");
5355
    if (dst_param)
5356
    {
5357 5358
        heap_free((void *)dst_param->reg.idx[0].rel_addr);
        heap_free(dst_param);
5359
    }
5360
    if (src_param)
5361
    {
5362
        for (i = 0; i < ins->src_count; ++i)
5363
        {
5364
            heap_free((void *)src_param[i].reg.idx[0].rel_addr);
5365
        }
5366
        heap_free(src_param);
5367
    }
5368
    heap_free(rec);
5369 5370 5371 5372 5373 5374 5375 5376 5377 5378
}

static void free_recorded_instruction(struct list *list)
{
    struct recorded_instruction *rec_ins, *entry2;
    unsigned int i;

    LIST_FOR_EACH_ENTRY_SAFE(rec_ins, entry2, list, struct recorded_instruction, entry)
    {
        list_remove(&rec_ins->entry);
5379
        if (rec_ins->ins.dst)
5380
        {
5381 5382
            heap_free((void *)rec_ins->ins.dst->reg.idx[0].rel_addr);
            heap_free((void *)rec_ins->ins.dst);
5383
        }
5384
        if (rec_ins->ins.src)
5385
        {
5386
            for (i = 0; i < rec_ins->ins.src_count; ++i)
5387
            {
5388
                heap_free((void *)rec_ins->ins.src[i].reg.idx[0].rel_addr);
5389
            }
5390
            heap_free((void *)rec_ins->ins.src);
5391
        }
5392
        heap_free(rec_ins);
5393 5394 5395
    }
}

5396 5397 5398 5399 5400 5401 5402 5403 5404 5405
static void pop_control_frame(const struct wined3d_shader_instruction *ins)
{
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
    struct control_frame *control_frame;

    if (ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP)
    {
        struct list *e = list_head(&priv->control_frames);
        control_frame = LIST_ENTRY(e, struct control_frame, entry);
        list_remove(&control_frame->entry);
5406
        heap_free(control_frame);
5407 5408 5409 5410 5411 5412 5413 5414
        priv->loop_depth--;
    }
    else if (ins->handler_idx == WINED3DSIH_ENDIF)
    {
        /* Non-ifc ENDIFs were already handled previously. */
        struct list *e = list_head(&priv->control_frames);
        control_frame = LIST_ENTRY(e, struct control_frame, entry);
        list_remove(&control_frame->entry);
5415
        heap_free(control_frame);
5416 5417 5418
    }
}

5419 5420
static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) {
    SHADER_HANDLER hw_fct;
5421
    struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
5422
    const struct wined3d_shader *shader = ins->ctx->shader;
5423
    struct control_frame *control_frame;
5424
    struct wined3d_string_buffer *buffer = ins->ctx->buffer;
5425
    BOOL bool_const;
5426

5427 5428
    if(ins->handler_idx == WINED3DSIH_LOOP || ins->handler_idx == WINED3DSIH_REP)
    {
5429
        control_frame = heap_alloc_zero(sizeof(*control_frame));
5430 5431
        list_add_head(&priv->control_frames, &control_frame->entry);

5432 5433 5434
        if(ins->handler_idx == WINED3DSIH_LOOP) control_frame->type = LOOP;
        if(ins->handler_idx == WINED3DSIH_REP) control_frame->type = REP;

5435 5436
        if(priv->target_version >= NV2)
        {
5437
            control_frame->no.loop = priv->num_loops++;
5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452
            priv->loop_depth++;
        }
        else
        {
            /* Don't bother recording when we're in a not used if branch */
            if(priv->muted)
            {
                return;
            }

            if(!priv->recording)
            {
                list_init(&priv->record);
                priv->recording = TRUE;
                control_frame->outer_loop = TRUE;
5453
                get_loop_control_const(ins, shader, ins->src[0].reg.idx[0].offset, &control_frame->loop_control);
5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472
                return; /* Instruction is handled */
            }
            /* Record this loop in the outer loop's recording */
        }
    }
    else if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP)
    {
        if(priv->target_version >= NV2)
        {
            /* Nothing to do. The control frame is popped after the HW instr handler */
        }
        else
        {
            struct list *e = list_head(&priv->control_frames);
            control_frame = LIST_ENTRY(e, struct control_frame, entry);
            list_remove(&control_frame->entry);

            if(control_frame->outer_loop)
            {
5473 5474
                unsigned int iteration;
                int aL = 0;
5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489
                struct list copy;

                /* Turn off recording before playback */
                priv->recording = FALSE;

                /* Move the recorded instructions to a separate list and get them out of the private data
                 * structure. If there are nested loops, the shader_arb_handle_instruction below will
                 * be recorded again, thus priv->record might be overwritten
                 */
                list_init(&copy);
                list_move_tail(&copy, &priv->record);
                list_init(&priv->record);

                if(ins->handler_idx == WINED3DSIH_ENDLOOP)
                {
5490 5491 5492 5493
                    shader_addline(buffer, "#unrolling loop: %u iterations, aL=%u, inc %d\n",
                                   control_frame->loop_control.count, control_frame->loop_control.start,
                                   control_frame->loop_control.step);
                    aL = control_frame->loop_control.start;
5494 5495 5496
                }
                else
                {
5497
                    shader_addline(buffer, "#unrolling rep: %u iterations\n", control_frame->loop_control.count);
5498 5499
                }

5500
                for (iteration = 0; iteration < control_frame->loop_control.count; ++iteration)
5501 5502 5503 5504 5505
                {
                    struct recorded_instruction *rec_ins;
                    if(ins->handler_idx == WINED3DSIH_ENDLOOP)
                    {
                        priv->aL = aL;
5506
                        shader_addline(buffer, "#Iteration %u, aL=%d\n", iteration, aL);
5507 5508 5509
                    }
                    else
                    {
5510
                        shader_addline(buffer, "#Iteration %u\n", iteration);
5511 5512 5513 5514 5515 5516 5517 5518 5519
                    }

                    LIST_FOR_EACH_ENTRY(rec_ins, &copy, struct recorded_instruction, entry)
                    {
                        shader_arb_handle_instruction(&rec_ins->ins);
                    }

                    if(ins->handler_idx == WINED3DSIH_ENDLOOP)
                    {
5520
                        aL += control_frame->loop_control.step;
5521 5522 5523 5524 5525
                    }
                }
                shader_addline(buffer, "#end loop/rep\n");

                free_recorded_instruction(&copy);
5526
                heap_free(control_frame);
5527 5528 5529 5530 5531
                return; /* Instruction is handled */
            }
            else
            {
                /* This is a nested loop. Proceed to the normal recording function */
5532
                heap_free(control_frame);
5533 5534 5535 5536 5537 5538 5539 5540 5541 5542
            }
        }
    }

    if(priv->recording)
    {
        record_instruction(&priv->record, ins);
        return;
    }

5543 5544 5545
    /* boolean if */
    if(ins->handler_idx == WINED3DSIH_IF)
    {
5546
        control_frame = heap_alloc_zero(sizeof(*control_frame));
5547
        list_add_head(&priv->control_frames, &control_frame->entry);
5548
        control_frame->type = IF;
5549

5550 5551 5552
        bool_const = get_bool_const(ins, shader, ins->src[0].reg.idx[0].offset);
        if (ins->src[0].modifiers == WINED3DSPSM_NOT)
            bool_const = !bool_const;
5553
        if (!priv->muted && !bool_const)
5554 5555 5556
        {
            shader_addline(buffer, "#if(FALSE){\n");
            priv->muted = TRUE;
5557
            control_frame->muting = TRUE;
5558 5559 5560 5561 5562 5563 5564 5565
        }
        else shader_addline(buffer, "#if(TRUE) {\n");

        return; /* Instruction is handled */
    }
    else if(ins->handler_idx == WINED3DSIH_IFC)
    {
        /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */
5566
        control_frame = heap_alloc_zero(sizeof(*control_frame));
5567
        control_frame->type = IFC;
5568
        control_frame->no.ifc = priv->num_ifcs++;
5569
        list_add_head(&priv->control_frames, &control_frame->entry);
5570 5571 5572
    }
    else if(ins->handler_idx == WINED3DSIH_ELSE)
    {
5573 5574
        struct list *e = list_head(&priv->control_frames);
        control_frame = LIST_ENTRY(e, struct control_frame, entry);
5575

5576
        if(control_frame->type == IF)
5577 5578
        {
            shader_addline(buffer, "#} else {\n");
5579
            if(!priv->muted && !control_frame->muting)
5580 5581
            {
                priv->muted = TRUE;
5582
                control_frame->muting = TRUE;
5583
            }
5584
            else if(control_frame->muting) priv->muted = FALSE;
5585 5586 5587
            return; /* Instruction is handled. */
        }
        /* In case of an ifc, generate a HW shader instruction */
5588 5589
        if (control_frame->type != IFC)
            ERR("Control frame does not match.\n");
5590 5591 5592
    }
    else if(ins->handler_idx == WINED3DSIH_ENDIF)
    {
5593 5594
        struct list *e = list_head(&priv->control_frames);
        control_frame = LIST_ENTRY(e, struct control_frame, entry);
5595

5596
        if(control_frame->type == IF)
5597 5598
        {
            shader_addline(buffer, "#} endif\n");
5599 5600
            if(control_frame->muting) priv->muted = FALSE;
            list_remove(&control_frame->entry);
5601
            heap_free(control_frame);
5602 5603
            return; /* Instruction is handled */
        }
5604 5605 5606
        /* In case of an ifc, generate a HW shader instruction */
        if (control_frame->type != IFC)
            ERR("Control frame does not match.\n");
5607 5608
    }

5609 5610 5611 5612 5613
    if(priv->muted)
    {
        pop_control_frame(ins);
        return;
    }
5614 5615 5616 5617 5618 5619 5620

    /* Select handler */
    hw_fct = shader_arb_instruction_handler_table[ins->handler_idx];

    /* Unhandled opcode */
    if (!hw_fct)
    {
5621
        FIXME("Backend can't handle opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
5622 5623 5624 5625
        return;
    }
    hw_fct(ins);

5626
    pop_control_frame(ins);
5627

5628 5629 5630
    shader_arb_add_instruction_modifiers(ins);
}

5631 5632 5633 5634
static BOOL shader_arb_has_ffp_proj_control(void *shader_priv)
{
    struct shader_arb_priv *priv = shader_priv;

5635
    return priv->ffp_proj_control;
5636 5637
}

5638 5639
static void shader_arb_precompile(void *shader_priv, struct wined3d_shader *shader) {}

5640 5641
const struct wined3d_shader_backend_ops arb_program_shader_backend =
{
5642
    shader_arb_handle_instruction,
5643
    shader_arb_precompile,
5644
    shader_arb_select,
5645
    shader_arb_select_compute,
5646
    shader_arb_disable,
5647 5648
    shader_arb_update_float_vertex_constants,
    shader_arb_update_float_pixel_constants,
5649 5650 5651 5652
    shader_arb_load_constants,
    shader_arb_destroy,
    shader_arb_alloc,
    shader_arb_free,
5653 5654
    shader_arb_allocate_context_data,
    shader_arb_free_context_data,
5655
    shader_arb_init_context_state,
5656
    shader_arb_get_caps,
5657
    shader_arb_color_fixup_supported,
5658
    shader_arb_has_ffp_proj_control,
5659
};
5660 5661

/* ARB_fragment_program fixed function pipeline replacement definitions */
5662
#define ARB_FFP_CONST_TFACTOR           0
5663 5664 5665
#define ARB_FFP_CONST_COLOR_KEY_LOW     ((ARB_FFP_CONST_TFACTOR) + 1)
#define ARB_FFP_CONST_COLOR_KEY_HIGH    ((ARB_FFP_CONST_COLOR_KEY_LOW) + 1)
#define ARB_FFP_CONST_SPECULAR_ENABLE   ((ARB_FFP_CONST_COLOR_KEY_HIGH) + 1)
5666 5667 5668
#define ARB_FFP_CONST_CONSTANT(i)       ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i)
#define ARB_FFP_CONST_BUMPMAT(i)        ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i)
#define ARB_FFP_CONST_LUMINANCE(i)      ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i)
5669 5670 5671

struct arbfp_ffp_desc
{
5672
    struct ffp_frag_desc parent;
5673 5674 5675
    GLuint shader;
};

5676
/* Context activation is done by the caller. */
5677
static void arbfp_enable(const struct wined3d_context *context, BOOL enable)
5678
{
5679
    const struct wined3d_gl_info *gl_info = wined3d_context_gl_const(context)->gl_info;
5680

5681 5682 5683
    if (enable)
    {
        gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB);
5684
        checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
5685 5686 5687 5688
    }
    else
    {
        gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB);
5689 5690 5691 5692
        checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
    }
}

5693
static void *arbfp_alloc(const struct wined3d_shader_backend_ops *shader_backend, void *shader_priv)
5694
{
5695
    struct shader_arb_priv *priv;
5696 5697 5698 5699 5700 5701 5702

    /* Share private data between the shader backend and the pipeline
     * replacement, if both are the arb implementation. This is needed to
     * figure out whether ARBfp should be disabled if no pixel shader is bound
     * or not. */
    if (shader_backend == &arb_program_shader_backend)
        priv = shader_priv;
5703
    else if (!(priv = heap_alloc_zero(sizeof(*priv))))
5704 5705
        return NULL;

5706
    wine_rb_init(&priv->fragment_shaders, wined3d_ffp_frag_program_key_compare);
5707
    priv->use_arbfp_fixed_func = TRUE;
5708 5709

    return priv;
5710 5711
}

5712
/* Context activation is done by the caller. */
5713
static void arbfp_free_ffpshader(struct wine_rb_entry *entry, void *param)
5714 5715
{
    struct arbfp_ffp_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_ffp_desc, parent.entry);
5716
    struct wined3d_context_gl *context_gl = param;
5717
    const struct wined3d_gl_info *gl_info;
5718

5719
    gl_info = context_gl->gl_info;
5720
    GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader));
5721
    checkGLcall("delete ffp program");
5722
    heap_free(entry_arb);
5723 5724
}

5725
/* Context activation is done by the caller. */
5726
static void arbfp_free(struct wined3d_device *device, struct wined3d_context *context)
5727
{
5728
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
5729
    struct shader_arb_priv *priv = device->fragment_priv;
5730

5731
    wine_rb_destroy(&priv->fragment_shaders, arbfp_free_ffpshader, context_gl);
5732 5733
    priv->use_arbfp_fixed_func = FALSE;

5734
    if (device->shader_backend != &arb_program_shader_backend)
5735
        heap_free(device->fragment_priv);
5736 5737
}

5738
static void arbfp_get_caps(const struct wined3d_adapter *adapter, struct fragment_caps *caps)
5739
{
5740 5741
    const struct wined3d_gl_info *gl_info = &adapter->gl_info;

5742
    caps->wined3d_caps = WINED3D_FRAGMENT_CAP_PROJ_CONTROL
5743 5744
            | WINED3D_FRAGMENT_CAP_SRGB_WRITE
            | WINED3D_FRAGMENT_CAP_COLOR_KEY;
5745
    caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP;
5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768
    caps->TextureOpCaps =  WINED3DTEXOPCAPS_DISABLE                     |
                           WINED3DTEXOPCAPS_SELECTARG1                  |
                           WINED3DTEXOPCAPS_SELECTARG2                  |
                           WINED3DTEXOPCAPS_MODULATE4X                  |
                           WINED3DTEXOPCAPS_MODULATE2X                  |
                           WINED3DTEXOPCAPS_MODULATE                    |
                           WINED3DTEXOPCAPS_ADDSIGNED2X                 |
                           WINED3DTEXOPCAPS_ADDSIGNED                   |
                           WINED3DTEXOPCAPS_ADD                         |
                           WINED3DTEXOPCAPS_SUBTRACT                    |
                           WINED3DTEXOPCAPS_ADDSMOOTH                   |
                           WINED3DTEXOPCAPS_BLENDCURRENTALPHA           |
                           WINED3DTEXOPCAPS_BLENDFACTORALPHA            |
                           WINED3DTEXOPCAPS_BLENDTEXTUREALPHA           |
                           WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA           |
                           WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM         |
                           WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR      |
                           WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA      |
                           WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA   |
                           WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR   |
                           WINED3DTEXOPCAPS_DOTPRODUCT3                 |
                           WINED3DTEXOPCAPS_MULTIPLYADD                 |
                           WINED3DTEXOPCAPS_LERP                        |
5769 5770
                           WINED3DTEXOPCAPS_BUMPENVMAP                  |
                           WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE;
5771

5772
    /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */
5773

5774 5775
    caps->MaxTextureBlendStages   = WINED3D_MAX_TEXTURES;
    caps->MaxSimultaneousTextures = min(gl_info->limits.samplers[WINED3D_SHADER_TYPE_PIXEL], WINED3D_MAX_TEXTURES);
5776 5777
}

5778 5779 5780 5781 5782
static DWORD arbfp_get_emul_mask(const struct wined3d_gl_info *gl_info)
{
    return GL_EXT_EMUL_ARB_MULTITEXTURE | GL_EXT_EMUL_EXT_FOG_COORD;
}

5783 5784
static void state_texfactor_arbfp(struct wined3d_context *context,
        const struct wined3d_state *state, DWORD state_id)
5785
{
5786 5787
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
5788
    struct wined3d_device *device = context->device;
5789
    struct wined3d_color color;
5790

5791 5792
    if (device->shader_backend == &arb_program_shader_backend)
    {
5793 5794
        struct shader_arb_priv *priv;

5795 5796 5797 5798
        /* Don't load the parameter if we're using an arbfp pixel shader,
         * otherwise we'll overwrite application provided constants. */
        if (use_ps(state))
            return;
5799

5800 5801 5802
        priv = device->shader_priv;
        priv->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1;
        priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1);
5803
    }
5804

5805 5806 5807
    wined3d_color_from_d3dcolor(&color, state->render_states[WINED3D_RS_TEXTUREFACTOR]);
    GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r));
    checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, &color.r)");
5808 5809
}

5810 5811 5812 5813
static void state_tss_constant_arbfp(struct wined3d_context *context,
        const struct wined3d_state *state, DWORD state_id)
{
    DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
5814 5815
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
5816
    struct wined3d_device *device = context->device;
5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838
    struct wined3d_color color;

    if (device->shader_backend == &arb_program_shader_backend)
    {
        struct shader_arb_priv *priv;

        /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
         * application provided constants.
         */
        if (use_ps(state))
            return;

        priv = device->shader_priv;
        priv->pshader_const_dirty[ARB_FFP_CONST_CONSTANT(stage)] = 1;
        priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_CONSTANT(stage) + 1);
    }

    wined3d_color_from_d3dcolor(&color, state->texture_states[stage][WINED3D_TSS_CONSTANT]);
    GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r));
    checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_CONSTANT(stage), &color.r)");
}

5839 5840
static void state_arb_specularenable(struct wined3d_context *context,
        const struct wined3d_state *state, DWORD state_id)
5841
{
5842 5843
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
5844
    struct wined3d_device *device = context->device;
5845 5846
    float col[4];

5847 5848
    if (device->shader_backend == &arb_program_shader_backend)
    {
5849 5850
        struct shader_arb_priv *priv;

5851 5852 5853 5854 5855
        /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
         * application provided constants.
         */
        if (use_ps(state))
            return;
5856

5857 5858 5859
        priv = device->shader_priv;
        priv->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1;
        priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1);
5860
    }
5861

5862
    if (state->render_states[WINED3D_RS_SPECULARENABLE])
5863
    {
5864
        /* The specular color has no alpha */
5865 5866
        col[0] = 1.0f; col[1] = 1.0f;
        col[2] = 1.0f; col[3] = 0.0f;
5867
    } else {
5868 5869
        col[0] = 0.0f; col[1] = 0.0f;
        col[2] = 0.0f; col[3] = 0.0f;
5870 5871 5872 5873 5874
    }
    GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col));
    checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)");
}

5875
static void set_bumpmat_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
5876
{
5877
    DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
5878 5879
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
5880
    struct wined3d_device *device = context->device;
5881 5882
    float mat[2][2];

5883
    context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV;
5884

5885
    if (device->shader_backend == &arb_program_shader_backend)
5886 5887
    {
        struct shader_arb_priv *priv = device->shader_priv;
5888 5889 5890 5891 5892

        /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants. */
        if (use_ps(state))
            return;

5893 5894
        priv->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1;
        priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1);
5895 5896
    }

5897 5898 5899 5900
    mat[0][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT00]);
    mat[0][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT01]);
    mat[1][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT10]);
    mat[1][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT11]);
5901 5902 5903 5904 5905

    GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0]));
    checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])");
}

5906 5907
static void tex_bumpenvlum_arbfp(struct wined3d_context *context,
        const struct wined3d_state *state, DWORD state_id)
5908
{
5909
    DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
5910 5911
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
5912
    struct wined3d_device *device = context->device;
5913 5914
    float param[4];

5915
    context->constant_update_mask |= WINED3D_SHADER_CONST_PS_BUMP_ENV;
5916

5917
    if (device->shader_backend == &arb_program_shader_backend)
5918 5919
    {
        struct shader_arb_priv *priv = device->shader_priv;
5920

5921
        /* Exit now, don't set the luminance below, otherwise we may overwrite pixel shader constants. */
5922 5923 5924
        if (use_ps(state))
            return;

5925 5926
        priv->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1;
        priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1);
5927 5928
    }

5929 5930
    param[0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LSCALE]);
    param[1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LOFFSET]);
5931 5932
    param[2] = 0.0f;
    param[3] = 0.0f;
5933 5934 5935 5936 5937

    GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param));
    checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)");
}

5938 5939
static void alpha_test_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
{
5940 5941
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958
    int glParm;
    float ref;

    TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);

    if (state->render_states[WINED3D_RS_ALPHATESTENABLE])
    {
        gl_info->gl_ops.gl.p_glEnable(GL_ALPHA_TEST);
        checkGLcall("glEnable GL_ALPHA_TEST");
    }
    else
    {
        gl_info->gl_ops.gl.p_glDisable(GL_ALPHA_TEST);
        checkGLcall("glDisable GL_ALPHA_TEST");
        return;
    }

5959
    ref = wined3d_alpha_ref(state);
5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970
    glParm = wined3d_gl_compare_func(state->render_states[WINED3D_RS_ALPHAFUNC]);

    if (glParm)
    {
        gl_info->gl_ops.gl.p_glAlphaFunc(glParm, ref);
        checkGLcall("glAlphaFunc");
    }
}

static void color_key_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
{
5971 5972
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
5973 5974
    const struct wined3d_texture *texture = state->textures[0];
    struct wined3d_device *device = context->device;
5975
    struct wined3d_color float_key[2];
5976 5977 5978 5979 5980 5981 5982 5983

    if (!texture)
        return;

    if (device->shader_backend == &arb_program_shader_backend)
    {
        struct shader_arb_priv *priv;

5984 5985
        /* Don't load the parameter if we're using an arbfp pixel shader,
         * otherwise we'll overwrite application provided constants. */
5986 5987 5988 5989
        if (use_ps(state))
            return;

        priv = device->shader_priv;
5990 5991 5992
        priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_LOW] = 1;
        priv->pshader_const_dirty[ARB_FFP_CONST_COLOR_KEY_HIGH] = 1;
        priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_COLOR_KEY_HIGH + 1);
5993 5994
    }

5995 5996 5997 5998 5999 6000
    wined3d_format_get_float_color_key(texture->resource.format, &texture->async.src_blt_color_key, float_key);

    GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r));
    checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_LOW, &float_key[0].r)");
    GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r));
    checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_COLOR_KEY_HIGH, &float_key[1].r)");
6001 6002
}

6003
static const char *get_argreg(struct wined3d_string_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg)
6004
{
6005 6006
    const char *ret;

6007
    if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */
6008 6009 6010 6011 6012 6013

    switch(arg & WINED3DTA_SELECTMASK) {
        case WINED3DTA_DIFFUSE:
            ret = "fragment.color.primary"; break;

        case WINED3DTA_CURRENT:
6014
            ret = "ret";
6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049
            break;

        case WINED3DTA_TEXTURE:
            switch(stage) {
                case 0: ret = "tex0"; break;
                case 1: ret = "tex1"; break;
                case 2: ret = "tex2"; break;
                case 3: ret = "tex3"; break;
                case 4: ret = "tex4"; break;
                case 5: ret = "tex5"; break;
                case 6: ret = "tex6"; break;
                case 7: ret = "tex7"; break;
                default: ret = "unknown texture";
            }
            break;

        case WINED3DTA_TFACTOR:
            ret = "tfactor"; break;

        case WINED3DTA_SPECULAR:
            ret = "fragment.color.secondary"; break;

        case WINED3DTA_TEMP:
            ret = "tempreg"; break;

        case WINED3DTA_CONSTANT:
            switch(stage) {
                case 0: ret = "const0"; break;
                case 1: ret = "const1"; break;
                case 2: ret = "const2"; break;
                case 3: ret = "const3"; break;
                case 4: ret = "const4"; break;
                case 5: ret = "const5"; break;
                case 6: ret = "const6"; break;
                case 7: ret = "const7"; break;
6050
                default: ret = "unknown constant";
6051
            }
6052 6053
            break;

6054 6055 6056 6057 6058 6059 6060 6061 6062
        default:
            return "unknown";
    }

    if(arg & WINED3DTA_COMPLEMENT) {
        shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret);
        if(argnum == 0) ret = "arg0";
        if(argnum == 1) ret = "arg1";
        if(argnum == 2) ret = "arg2";
6063 6064
    }
    if(arg & WINED3DTA_ALPHAREPLICATE) {
6065
        shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret);
6066 6067 6068
        if(argnum == 0) ret = "arg0";
        if(argnum == 1) ret = "arg1";
        if(argnum == 2) ret = "arg2";
6069 6070 6071 6072
    }
    return ret;
}

6073
static void gen_ffp_instr(struct wined3d_string_buffer *buffer, unsigned int stage, BOOL color,
6074
        BOOL alpha, BOOL tmp_dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2)
6075
{
6076 6077 6078
    const char *dstmask, *dstreg, *arg0, *arg1, *arg2;
    unsigned int mul = 1;

6079 6080 6081 6082 6083 6084
    if (color && alpha)
        dstmask = "";
    else if (color)
        dstmask = ".xyz";
    else
        dstmask = ".w";
6085

6086
    dstreg = tmp_dst ? "tempreg" : "ret";
6087 6088 6089 6090 6091

    arg0 = get_argreg(buffer, 0, stage, dw_arg0);
    arg1 = get_argreg(buffer, 1, stage, dw_arg1);
    arg2 = get_argreg(buffer, 2, stage, dw_arg2);

6092 6093 6094
    switch (op)
    {
        case WINED3D_TOP_DISABLE:
6095 6096
            break;

6097
        case WINED3D_TOP_SELECT_ARG2:
6098
            arg1 = arg2;
6099
            /* FALLTHROUGH */
6100
        case WINED3D_TOP_SELECT_ARG1:
6101 6102 6103
            shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1);
            break;

6104
        case WINED3D_TOP_MODULATE_4X:
6105
            mul = 2;
6106
            /* FALLTHROUGH */
6107
        case WINED3D_TOP_MODULATE_2X:
6108
            mul *= 2;
6109
            /* FALLTHROUGH */
6110
        case WINED3D_TOP_MODULATE:
6111 6112 6113
            shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
            break;

6114
        case WINED3D_TOP_ADD_SIGNED_2X:
6115
            mul = 2;
6116
            /* FALLTHROUGH */
6117
        case WINED3D_TOP_ADD_SIGNED:
6118 6119
            shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2);
            arg2 = "arg2";
6120
            /* FALLTHROUGH */
6121
        case WINED3D_TOP_ADD:
6122
            shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
6123 6124
            break;

6125
        case WINED3D_TOP_SUBTRACT:
6126
            shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
6127 6128
            break;

6129
        case WINED3D_TOP_ADD_SMOOTH:
6130
            shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1);
6131
            shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1);
6132 6133
            break;

6134
        case WINED3D_TOP_BLEND_CURRENT_ALPHA:
6135
            arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT);
6136
            shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6137
            break;
6138
        case WINED3D_TOP_BLEND_FACTOR_ALPHA:
6139
            arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR);
6140
            shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6141
            break;
6142
        case WINED3D_TOP_BLEND_TEXTURE_ALPHA:
6143
            arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE);
6144
            shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6145
            break;
6146
        case WINED3D_TOP_BLEND_DIFFUSE_ALPHA:
6147
            arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE);
6148
            shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
6149 6150
            break;

6151
        case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM:
6152
            arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE);
6153 6154
            shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0);
            shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1);
6155 6156 6157 6158
            break;

        /* D3DTOP_PREMODULATE ???? */

6159
        case WINED3D_TOP_MODULATE_INVALPHA_ADD_COLOR:
6160 6161
            shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1);
            shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1);
6162
            break;
6163
        case WINED3D_TOP_MODULATE_ALPHA_ADD_COLOR:
6164
            shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1);
6165
            break;
6166
        case WINED3D_TOP_MODULATE_INVCOLOR_ADD_ALPHA:
6167
            shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1);
6168
            shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1);
6169
            break;
6170
        case WINED3D_TOP_MODULATE_COLOR_ADD_ALPHA:
6171
            shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1);
6172 6173
            break;

6174
        case WINED3D_TOP_DOTPRODUCT3:
6175 6176 6177
            mul = 4;
            shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1);
            shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2);
6178
            shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask);
6179 6180
            break;

6181
        case WINED3D_TOP_MULTIPLY_ADD:
6182
            shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0);
6183 6184
            break;

6185
        case WINED3D_TOP_LERP:
6186 6187 6188 6189
            /* The msdn is not quite right here */
            shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
            break;

6190 6191
        case WINED3D_TOP_BUMPENVMAP:
        case WINED3D_TOP_BUMPENVMAP_LUMINANCE:
6192 6193 6194 6195 6196 6197 6198
            /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */
            break;

        default:
            FIXME("Unhandled texture op %08x\n", op);
    }

6199 6200 6201 6202
    if (mul == 2)
        shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", dstreg, dstmask, dstreg);
    else if (mul == 4)
        shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", dstreg, dstmask, dstreg);
6203 6204
}

6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223
static const char *arbfp_texture_target(enum wined3d_gl_resource_type type)
{
    switch(type)
    {
        case WINED3D_GL_RES_TYPE_TEX_1D:
            return "1D";
        case WINED3D_GL_RES_TYPE_TEX_2D:
            return "2D";
        case WINED3D_GL_RES_TYPE_TEX_3D:
            return "3D";
        case WINED3D_GL_RES_TYPE_TEX_CUBE:
            return "CUBE";
        case WINED3D_GL_RES_TYPE_TEX_RECT:
            return "RECT";
        default:
            return "unexpected_resource_type";
    }
}

6224
static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, const struct wined3d_gl_info *gl_info)
6225
{
6226 6227 6228
    BYTE tex_read = 0, bump_used = 0, luminance_used = 0, constant_used = 0;
    BOOL tempreg_used = FALSE, tfactor_used = FALSE;
    unsigned int stage, lowest_disabled_stage;
6229
    struct wined3d_string_buffer buffer;
6230 6231 6232
    struct color_fixup_masks masks;
    BOOL custom_linear_fog = FALSE;
    const char *textype, *instr;
6233
    DWORD arg0, arg1, arg2;
6234
    char colorcor_dst[8];
6235
    BOOL op_equal;
6236
    GLuint ret;
6237

6238
    if (!string_buffer_init(&buffer))
6239 6240 6241 6242 6243 6244 6245 6246 6247
    {
        ERR("Failed to initialize shader buffer.\n");
        return 0;
    }

    shader_addline(&buffer, "!!ARBfp1.0\n");

    if (settings->color_key_enabled)
    {
6248 6249
        shader_addline(&buffer, "PARAM color_key_low = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_LOW);
        shader_addline(&buffer, "PARAM color_key_high = program.env[%u];\n", ARB_FFP_CONST_COLOR_KEY_HIGH);
6250
        tex_read |= 1;
6251 6252
    }

6253
    /* Find out which textures are read */
6254
    for (stage = 0; stage < WINED3D_MAX_TEXTURES; ++stage)
6255 6256 6257
    {
        if (settings->op[stage].cop == WINED3D_TOP_DISABLE)
            break;
6258

6259 6260 6261 6262
        arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK;
        arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK;
        arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK;

6263 6264
        if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE)
            tex_read |= 1u << stage;
6265
        if (settings->op[stage].tmp_dst)
6266
            tempreg_used = TRUE;
6267
        if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP)
6268
            tempreg_used = TRUE;
6269 6270 6271
        if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR)
            tfactor_used = TRUE;
        if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT)
6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293
            constant_used |= 1u << stage;

        switch (settings->op[stage].cop)
        {
            case WINED3D_TOP_BUMPENVMAP_LUMINANCE:
                luminance_used |= 1u << stage;
                /* fall through */
            case WINED3D_TOP_BUMPENVMAP:
                bump_used |= 1u << stage;
                /* fall through */
            case WINED3D_TOP_BLEND_TEXTURE_ALPHA:
            case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM:
                tex_read |= 1u << stage;
                break;

            case WINED3D_TOP_BLEND_FACTOR_ALPHA:
                tfactor_used = TRUE;
                break;

            default:
                break;
        }
6294

6295 6296
        if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
            continue;
6297

6298 6299 6300 6301
        arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK;
        arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK;
        arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK;

6302 6303
        if (arg0 == WINED3DTA_TEXTURE || arg1 == WINED3DTA_TEXTURE || arg2 == WINED3DTA_TEXTURE)
            tex_read |= 1u << stage;
6304
        if (arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP)
6305
            tempreg_used = TRUE;
6306
        if (arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR)
6307
            tfactor_used = TRUE;
6308
        if (arg0 == WINED3DTA_CONSTANT || arg1 == WINED3DTA_CONSTANT || arg2 == WINED3DTA_CONSTANT)
6309
            constant_used |= 1u << stage;
6310
    }
6311
    lowest_disabled_stage = stage;
6312

6313 6314 6315
    switch (settings->fog)
    {
        case WINED3D_FFP_PS_FOG_OFF:                                                         break;
6316 6317 6318 6319 6320 6321 6322 6323 6324
        case WINED3D_FFP_PS_FOG_LINEAR:
            if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG)
            {
                custom_linear_fog = TRUE;
                break;
            }
            shader_addline(&buffer, "OPTION ARB_fog_linear;\n");
            break;

6325 6326
        case WINED3D_FFP_PS_FOG_EXP:    shader_addline(&buffer, "OPTION ARB_fog_exp;\n");    break;
        case WINED3D_FFP_PS_FOG_EXP2:   shader_addline(&buffer, "OPTION ARB_fog_exp2;\n");   break;
6327 6328 6329 6330
        default: FIXME("Unexpected fog setting %d\n", settings->fog);
    }

    shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n");
6331
    shader_addline(&buffer, "TEMP TMP;\n");
6332
    shader_addline(&buffer, "TEMP ret;\n");
6333
    if (tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n");
6334 6335 6336
    shader_addline(&buffer, "TEMP arg0;\n");
    shader_addline(&buffer, "TEMP arg1;\n");
    shader_addline(&buffer, "TEMP arg2;\n");
6337
    for (stage = 0; stage < WINED3D_MAX_TEXTURES; ++stage)
6338
    {
6339
        if (constant_used & (1u << stage))
6340 6341
            shader_addline(&buffer, "PARAM const%u = program.env[%u];\n", stage, ARB_FFP_CONST_CONSTANT(stage));

6342
        if (!(tex_read & (1u << stage)))
6343 6344
            continue;

6345
        shader_addline(&buffer, "TEMP tex%u;\n", stage);
6346

6347
        if (!(bump_used & (1u << stage)))
6348
            continue;
6349
        shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage));
6350

6351
        if (!(luminance_used & (1u << stage)))
6352
            continue;
6353
        shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage));
6354
    }
6355
    if (tfactor_used)
6356
        shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR);
6357
    shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE);
6358

6359 6360 6361
    if (settings->sRGB_write)
    {
        shader_addline(&buffer, "PARAM srgb_consts0 = ");
6362
        shader_arb_append_imm_vec4(&buffer, &wined3d_srgb_const[0].x);
6363 6364
        shader_addline(&buffer, ";\n");
        shader_addline(&buffer, "PARAM srgb_consts1 = ");
6365
        shader_arb_append_imm_vec4(&buffer, &wined3d_srgb_const[1].x);
6366
        shader_addline(&buffer, ";\n");
6367 6368
    }

6369
    if (lowest_disabled_stage < 7 && settings->emul_clipplanes)
6370
        shader_addline(&buffer, "KIL fragment.texcoord[7];\n");
6371

6372 6373 6374
    if (tempreg_used || settings->sRGB_write)
        shader_addline(&buffer, "MOV tempreg, 0.0;\n");

6375
    /* Generate texture sampling instructions */
6376
    for (stage = 0; stage < WINED3D_MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage)
6377
    {
6378
        if (!(tex_read & (1u << stage)))
6379
            continue;
6380

6381
        textype = arbfp_texture_target(settings->op[stage].tex_type);
6382

6383 6384
        if (settings->op[stage].projected == WINED3D_PROJECTION_NONE)
        {
6385
            instr = "TEX";
6386 6387 6388 6389
        }
        else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT4
                || settings->op[stage].projected == WINED3D_PROJECTION_COUNT3)
        {
6390
            instr = "TXP";
6391 6392 6393
        }
        else
        {
6394
            FIXME("Unexpected projection mode %d\n", settings->op[stage].projected);
6395 6396 6397
            instr = "TXP";
        }

6398 6399 6400 6401
        if (stage > 0
                && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP
                || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE))
        {
6402
            shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1);
6403
            shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1);
6404
            shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1);
6405
            shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1);
6406

6407 6408 6409 6410 6411 6412 6413 6414
            /* With projective textures, texbem only divides the static
             * texture coordinate, not the displacement, so multiply the
             * displacement with the dividing parameter before passing it to
             * TXP. */
            if (settings->op[stage].projected != WINED3D_PROJECTION_NONE)
            {
                if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT4)
                {
6415
                    shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage);
6416 6417 6418 6419 6420
                    shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n",
                            stage, stage);
                }
                else
                {
6421
                    shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage);
6422 6423
                    shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n",
                            stage, stage);
6424
                }
6425 6426 6427
            }
            else
            {
6428 6429 6430
                shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage);
            }

6431 6432
            shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n",
                    instr, stage, stage, textype);
6433 6434
            if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)
            {
6435
                shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n",
6436
                               stage - 1, stage - 1, stage - 1);
6437
                shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage);
6438
            }
6439 6440 6441
        }
        else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3)
        {
6442
            shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage);
6443
            shader_addline(&buffer, "MOV ret.w, ret.z;\n");
6444 6445
            shader_addline(&buffer, "%s tex%u, ret, texture[%u], %s;\n",
                            instr, stage, stage, textype);
6446 6447 6448
        }
        else
        {
6449 6450
            shader_addline(&buffer, "%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n",
                            instr, stage, stage, stage, textype);
6451 6452 6453
        }

        sprintf(colorcor_dst, "tex%u", stage);
6454 6455 6456
        masks = calc_color_correction(settings->op[stage].color_fixup, WINED3DSP_WRITEMASK_ALL);
        gen_color_correction(&buffer, colorcor_dst, colorcor_dst, "const.x", "const.y",
                settings->op[stage].color_fixup, masks);
6457 6458
    }

6459 6460
    if (settings->color_key_enabled)
    {
6461 6462 6463 6464 6465 6466
        shader_addline(&buffer, "SLT TMP, tex0, color_key_low;\n"); /* below low key */
        shader_addline(&buffer, "SGE ret, tex0, color_key_high;\n"); /* above high key */
        shader_addline(&buffer, "ADD TMP, TMP, ret;\n"); /* or */
        shader_addline(&buffer, "DP4 TMP.b, TMP, TMP;\n"); /* on any channel */
        shader_addline(&buffer, "SGE TMP, -TMP.b, 0.0;\n"); /* logical not */
        shader_addline(&buffer, "KIL -TMP;\n"); /* discard if true */
6467 6468
    }

6469 6470
    shader_addline(&buffer, "MOV ret, fragment.color.primary;\n");

6471
    /* Generate the main shader */
6472
    for (stage = 0; stage < WINED3D_MAX_TEXTURES; ++stage)
6473
    {
6474
        if (settings->op[stage].cop == WINED3D_TOP_DISABLE)
6475 6476
            break;

6477 6478
        if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1
                && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1)
6479
            op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1;
6480 6481
        else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1
                && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2)
6482
            op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2;
6483 6484
        else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2
                && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1)
6485
            op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1;
6486 6487
        else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2
                && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2)
6488
            op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2;
6489 6490 6491 6492 6493
        else
            op_equal = settings->op[stage].aop   == settings->op[stage].cop
                    && settings->op[stage].carg0 == settings->op[stage].aarg0
                    && settings->op[stage].carg1 == settings->op[stage].aarg1
                    && settings->op[stage].carg2 == settings->op[stage].aarg2;
6494

6495 6496
        if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
        {
6497
            gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].tmp_dst,
6498 6499
                          settings->op[stage].cop, settings->op[stage].carg0,
                          settings->op[stage].carg1, settings->op[stage].carg2);
6500 6501 6502
        }
        else if (op_equal)
        {
6503
            gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].tmp_dst,
6504 6505
                          settings->op[stage].cop, settings->op[stage].carg0,
                          settings->op[stage].carg1, settings->op[stage].carg2);
6506 6507 6508 6509
        }
        else if (settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP
                && settings->op[stage].cop != WINED3D_TOP_BUMPENVMAP_LUMINANCE)
        {
6510
            gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].tmp_dst,
6511 6512
                          settings->op[stage].cop, settings->op[stage].carg0,
                          settings->op[stage].carg1, settings->op[stage].carg2);
6513
            gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].tmp_dst,
6514 6515 6516 6517 6518
                          settings->op[stage].aop, settings->op[stage].aarg0,
                          settings->op[stage].aarg1, settings->op[stage].aarg2);
        }
    }

6519 6520
    if (settings->sRGB_write || custom_linear_fog)
    {
6521
        shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, ret;\n");
6522 6523 6524 6525
        if (settings->sRGB_write)
            arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2", "tempreg", FALSE);
        if (custom_linear_fog)
            arbfp_add_linear_fog(&buffer, "ret", "arg0");
6526
        shader_addline(&buffer, "MOV result.color, ret;\n");
6527 6528 6529
    }
    else
    {
6530
        shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, ret;\n");
6531
    }
6532 6533 6534 6535 6536 6537 6538

    /* Footer */
    shader_addline(&buffer, "END\n");

    /* Generate the shader */
    GL_EXTCALL(glGenProgramsARB(1, &ret));
    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret));
6539
    shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer);
6540

6541
    string_buffer_free(&buffer);
6542 6543 6544
    return ret;
}

6545
static void fragment_prog_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
6546
{
6547 6548
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
6549
    const struct wined3d_device *device = context->device;
6550
    struct shader_arb_priv *priv = device->fragment_priv;
6551
    BOOL use_pshader = use_ps(state);
6552
    struct ffp_frag_settings settings;
6553
    const struct arbfp_ffp_desc *desc;
6554 6555
    unsigned int i;

6556
    TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
6557

6558
    if (isStateDirty(context, STATE_RENDER(WINED3D_RS_FOGENABLE)))
6559 6560 6561 6562 6563
    {
        if (!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader)
        {
            /* Reload fixed function constants since they collide with the
             * pixel shader constants. */
6564
            for (i = 0; i < WINED3D_MAX_TEXTURES; ++i)
6565
            {
6566
                set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00));
6567
                state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT));
6568
            }
6569 6570
            state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR));
            state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE));
6571
            color_key_arbfp(context, state, STATE_COLOR_KEY);
6572
        }
6573
        else if (use_pshader)
6574
        {
6575
            context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL;
6576 6577 6578
        }
        return;
    }
6579

6580 6581 6582 6583
    if (!use_pshader)
    {
        /* Find or create a shader implementing the fixed function pipeline
         * settings, then activate it. */
6584
        gen_ffp_frag_op(context, state, &settings, FALSE);
6585
        desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(&priv->fragment_shaders, &settings);
6586 6587
        if (!desc)
        {
6588 6589 6590
            struct arbfp_ffp_desc *new_desc;

            if (!(new_desc = heap_alloc(sizeof(*new_desc))))
6591
            {
6592 6593 6594 6595
                ERR("Out of memory\n");
                return;
            }

6596
            new_desc->parent.settings = settings;
6597
            new_desc->shader = gen_arbfp_ffp_shader(&settings, gl_info);
6598
            add_ffp_frag_shader(&priv->fragment_shaders, &new_desc->parent);
6599 6600
            TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc);
            desc = new_desc;
6601 6602
        }

6603
        /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active (however, note the
6604 6605 6606 6607 6608
         * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will
         * deactivate it.
         */
        GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader));
        checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)");
6609
        priv->current_fprogram_id = desc->shader;
6610

6611 6612 6613 6614
        if (device->shader_backend == &arb_program_shader_backend && context->last_was_pshader)
        {
            /* Reload fixed function constants since they collide with the
             * pixel shader constants. */
6615
            for (i = 0; i < WINED3D_MAX_TEXTURES; ++i)
6616
            {
6617
                set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00));
6618
                state_tss_constant_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_CONSTANT));
6619
            }
6620 6621
            state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR));
            state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE));
6622
            color_key_arbfp(context, state, STATE_COLOR_KEY);
6623
        }
6624
        context->last_was_pshader = FALSE;
6625 6626 6627 6628 6629
    }
    else if (!context->last_was_pshader)
    {
        if (device->shader_backend == &arb_program_shader_backend)
            context->constant_update_mask |= WINED3D_SHADER_CONST_PS_F;
6630
        context->last_was_pshader = TRUE;
6631 6632
    }

6633
    context->shader_update_mask |= 1u << WINED3D_SHADER_TYPE_PIXEL;
6634 6635
}

6636 6637 6638 6639 6640 6641 6642 6643
/* We can't link the fog states to the fragment state directly since the
 * vertex pipeline links them to FOGENABLE. A different linking in different
 * pipeline parts can't be expressed in the combined state table, so we need
 * to handle that with a forwarding function. The other invisible side effect
 * is that changing the fog start and fog end (which links to FOGENABLE in
 * vertex) results in the fragment_prog_arbfp function being called because
 * FOGENABLE is dirty, which calls this function here. */
static void state_arbfp_fog(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
6644
{
6645
    enum fogsource new_source;
6646 6647
    DWORD fogstart = state->render_states[WINED3D_RS_FOGSTART];
    DWORD fogend = state->render_states[WINED3D_RS_FOGEND];
6648

6649
    TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
6650

6651
    if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL)))
6652
        fragment_prog_arbfp(context, state, state_id);
6653

6654
    if (!state->render_states[WINED3D_RS_FOGENABLE])
6655
        return;
6656

6657
    if (state->render_states[WINED3D_RS_FOGTABLEMODE] == WINED3D_FOG_NONE)
6658
    {
6659 6660
        if (use_vs(state))
        {
6661
            new_source = FOGSOURCE_VS;
6662 6663 6664
        }
        else
        {
6665
            if (state->render_states[WINED3D_RS_FOGVERTEXMODE] == WINED3D_FOG_NONE || context->last_was_rhw)
6666
                new_source = FOGSOURCE_COORD;
6667
            else
6668
                new_source = FOGSOURCE_FFP;
6669
        }
6670 6671 6672
    }
    else
    {
6673 6674
        new_source = FOGSOURCE_FFP;
    }
6675

6676
    if (new_source != context->fog_source || fogstart == fogend)
6677
    {
6678
        context->fog_source = new_source;
6679
        state_fogstartend(context, state, STATE_RENDER(WINED3D_RS_FOGSTART));
6680
    }
6681 6682
}

6683
static void textransform(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
6684
{
6685
    if (!isStateDirty(context, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL)))
6686
        fragment_prog_arbfp(context, state, state_id);
6687 6688
}

6689
static const struct wined3d_state_entry_template arbfp_fragmentstate_template[] =
6690 6691
{
    {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR),              { STATE_RENDER(WINED3D_RS_TEXTUREFACTOR),             state_texfactor_arbfp   }, WINED3D_GL_EXT_NONE             },
6692 6693 6694 6695 6696 6697 6698 6699 6700
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6701 6702 6703 6704 6705 6706
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6707 6708 6709 6710 6711 6712 6713 6714 6715
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6716 6717 6718 6719 6720 6721
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6722 6723 6724 6725 6726 6727 6728 6729 6730
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6731 6732 6733 6734 6735 6736
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6737 6738 6739 6740 6741 6742 6743 6744 6745
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6746 6747 6748 6749 6750 6751
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6752 6753 6754 6755 6756 6757 6758 6759 6760
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6761 6762 6763 6764 6765 6766
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6767 6768 6769 6770 6771 6772 6773 6774 6775
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6776 6777 6778 6779 6780 6781
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6782 6783 6784 6785 6786 6787 6788 6789 6790
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6791 6792 6793 6794 6795 6796
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6797 6798 6799 6800 6801 6802 6803 6804 6805
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_OP),         { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG1),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG2),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG0),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_RESULT_ARG),       { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6806 6807 6808 6809 6810 6811
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
6812
    {STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),             { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            fragment_prog_arbfp     }, WINED3D_GL_EXT_NONE             },
6813 6814
    {STATE_RENDER(WINED3D_RS_ALPHAFUNC),                  { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE),           NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_RENDER(WINED3D_RS_ALPHAREF),                   { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE),           NULL                    }, WINED3D_GL_EXT_NONE             },
6815 6816 6817
    {STATE_RENDER(WINED3D_RS_ALPHATESTENABLE),            { STATE_RENDER(WINED3D_RS_ALPHATESTENABLE),           alpha_test_arbfp        }, WINED3D_GL_EXT_NONE             },
    {STATE_RENDER(WINED3D_RS_COLORKEYENABLE),             { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_COLOR_KEY,                                     { STATE_COLOR_KEY,                                    color_key_arbfp         }, WINED3D_GL_EXT_NONE             },
6818 6819 6820 6821 6822
    {STATE_RENDER(WINED3D_RS_FOGENABLE),                  { STATE_RENDER(WINED3D_RS_FOGENABLE),                 state_arbfp_fog         }, WINED3D_GL_EXT_NONE             },
    {STATE_RENDER(WINED3D_RS_FOGTABLEMODE),               { STATE_RENDER(WINED3D_RS_FOGENABLE),                 NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_RENDER(WINED3D_RS_FOGVERTEXMODE),              { STATE_RENDER(WINED3D_RS_FOGENABLE),                 NULL                    }, WINED3D_GL_EXT_NONE             },
    {STATE_RENDER(WINED3D_RS_FOGSTART),                   { STATE_RENDER(WINED3D_RS_FOGSTART),                  state_fogstartend       }, WINED3D_GL_EXT_NONE             },
    {STATE_RENDER(WINED3D_RS_FOGEND),                     { STATE_RENDER(WINED3D_RS_FOGSTART),                  NULL                    }, WINED3D_GL_EXT_NONE             },
6823
    {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),            { STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),           state_srgbwrite         }, ARB_FRAMEBUFFER_SRGB            },
6824
    {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),            { STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL),            NULL                    }, WINED3D_GL_EXT_NONE             },
6825 6826
    {STATE_RENDER(WINED3D_RS_FOGCOLOR),                   { STATE_RENDER(WINED3D_RS_FOGCOLOR),                  state_fogcolor          }, WINED3D_GL_EXT_NONE             },
    {STATE_RENDER(WINED3D_RS_FOGDENSITY),                 { STATE_RENDER(WINED3D_RS_FOGDENSITY),                state_fogdensity        }, WINED3D_GL_EXT_NONE             },
6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842
    {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(0, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(1, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(2, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(3, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(4, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(5, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(6, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
    {STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT),         { STATE_TEXTURESTAGE(7, WINED3D_TSS_CONSTANT),        state_tss_constant_arbfp}, WINED3D_GL_EXT_NONE             },
6843
    {STATE_RENDER(WINED3D_RS_SPECULARENABLE),             { STATE_RENDER(WINED3D_RS_SPECULARENABLE),            state_arb_specularenable}, WINED3D_GL_EXT_NONE             },
6844
    {STATE_RENDER(WINED3D_RS_SHADEMODE),                  { STATE_RENDER(WINED3D_RS_SHADEMODE),                 state_shademode         }, WINED3D_GL_EXT_NONE             },
6845
    {0 /* Terminate */,                                   { 0,                                                  0                       }, WINED3D_GL_EXT_NONE             },
6846 6847
};

6848 6849 6850 6851 6852 6853 6854 6855 6856
static BOOL arbfp_alloc_context_data(struct wined3d_context *context)
{
    return TRUE;
}

static void arbfp_free_context_data(struct wined3d_context *context)
{
}

6857 6858
const struct wined3d_fragment_pipe_ops arbfp_fragment_pipeline =
{
6859 6860
    arbfp_enable,
    arbfp_get_caps,
6861
    arbfp_get_emul_mask,
6862 6863
    arbfp_alloc,
    arbfp_free,
6864 6865
    arbfp_alloc_context_data,
    arbfp_free_context_data,
6866
    shader_arb_color_fixup_supported,
6867
    arbfp_fragmentstate_template,
6868
};
6869

6870 6871
struct arbfp_blit_type
{
6872 6873
    enum complex_fixup fixup : 4;
    enum wined3d_gl_resource_type res_type : 3;
6874 6875
    DWORD use_color_key : 1;
    DWORD padding : 24;
6876 6877 6878 6879
};

struct arbfp_blit_desc
{
6880
    GLuint shader;
6881 6882 6883 6884
    struct arbfp_blit_type type;
    struct wine_rb_entry entry;
};

6885
#define ARBFP_BLIT_PARAM_SIZE 0
6886 6887
#define ARBFP_BLIT_PARAM_COLOR_KEY_LOW 1
#define ARBFP_BLIT_PARAM_COLOR_KEY_HIGH 2
6888

6889
struct wined3d_arbfp_blitter
6890
{
6891
    struct wined3d_blitter blitter;
6892
    struct wine_rb_tree shaders;
6893
    GLuint palette_texture;
6894 6895
};

6896 6897 6898 6899 6900
static int arbfp_blit_type_compare(const void *key, const struct wine_rb_entry *entry)
{
    const struct arbfp_blit_type *ka = key;
    const struct arbfp_blit_type *kb = &WINE_RB_ENTRY_VALUE(entry, const struct arbfp_blit_desc, entry)->type;

6901
    return memcmp(ka, kb, sizeof(*ka));
6902 6903 6904
}

/* Context activation is done by the caller. */
6905
static void arbfp_free_blit_shader(struct wine_rb_entry *entry, void *ctx)
6906 6907
{
    struct arbfp_blit_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry);
6908
    const struct wined3d_gl_info *gl_info;
6909
    struct wined3d_context_gl *context_gl;
6910

6911
    context_gl = ctx;
6912
    gl_info = context_gl->gl_info;
6913 6914 6915

    GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader));
    checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)");
6916
    heap_free(entry_arb);
6917 6918
}

6919 6920
/* Context activation is done by the caller. */
static void arbfp_blitter_destroy(struct wined3d_blitter *blitter, struct wined3d_context *context)
6921
{
6922
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
6923
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
6924 6925
    struct wined3d_arbfp_blitter *arbfp_blitter;
    struct wined3d_blitter *next;
6926

6927
    if ((next = blitter->next))
6928
        next->ops->blitter_destroy(next, &context_gl->c);
6929

6930
    arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter);
6931

6932
    wine_rb_destroy(&arbfp_blitter->shaders, arbfp_free_blit_shader, context_gl);
6933
    checkGLcall("Delete blit programs");
6934

6935 6936
    if (arbfp_blitter->palette_texture)
        gl_info->gl_ops.gl.p_glDeleteTextures(1, &arbfp_blitter->palette_texture);
6937

6938
    heap_free(arbfp_blitter);
6939 6940
}

6941
static void gen_packed_yuv_read(struct wined3d_string_buffer *buffer,
6942
        const struct arbfp_blit_type *type, char *luminance)
6943
{
6944
    char chroma;
6945
    const char *tex, *texinstr = "TXP";
6946

6947 6948
    if (type->fixup == COMPLEX_FIXUP_UYVY)
    {
6949 6950
        chroma = 'x';
        *luminance = 'w';
6951 6952 6953
    }
    else
    {
6954 6955
        chroma = 'w';
        *luminance = 'x';
6956
    }
6957

6958 6959 6960
    tex = arbfp_texture_target(type->res_type);
    if (type->res_type == WINED3D_GL_RES_TYPE_TEX_RECT)
        texinstr = "TEX";
6961

6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973
    /* First we have to read the chroma values. This means we need at least two pixels(no filtering),
     * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the
     * filtering when we sample the texture.
     *
     * These are the rules for reading the chroma:
     *
     * Even pixel: Cr
     * Even pixel: U
     * Odd pixel: V
     *
     * So we have to get the sampling x position in non-normalized coordinates in integers
     */
6974
    if (type->res_type != WINED3D_GL_RES_TYPE_TEX_RECT)
6975
    {
6976 6977
        shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n");
        shader_addline(buffer, "MOV texcrd.w, size.x;\n");
6978 6979 6980
    }
    else
    {
6981 6982 6983 6984 6985 6986 6987 6988 6989
        shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
    }
    /* We must not allow filtering between pixel x and x+1, this would mix U and V
     * Vertical filtering is ok. However, bear in mind that the pixel center is at
     * 0.5, so add 0.5.
     */
    shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
    shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");

6990 6991
    /* Multiply the x coordinate by 0.5 and get the fraction. This gives 0.25
     * and 0.75 for the even and odd pixels respectively. */
6992 6993 6994
    shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n");
    shader_addline(buffer, "FRC texcrd2, texcrd2;\n");

6995
    /* Sample Pixel 1. */
6996 6997 6998 6999
    shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);

    /* Put the value into either of the chroma values */
    shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
7000
    shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma);
7001
    shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
7002
    shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma);
7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013

    /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample
     * the pixel right to the current one. Otherwise, sample the left pixel.
     * Bias and scale the SLT result to -1;1 and add it to the texcrd.x.
     */
    shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n");
    shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n");
    shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);

    /* Put the value into the other chroma */
    shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
7014
    shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma);
7015
    shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
7016
    shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma);
7017 7018 7019 7020 7021 7022 7023 7024 7025

    /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of
     * the current one and lerp the two U and V values
     */

    /* This gives the correctly filtered luminance value */
    shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex);
}

7026 7027
static void gen_yv12_read(struct wined3d_string_buffer *buffer,
        const struct arbfp_blit_type *type, char *luminance)
7028
{
7029
    const char *tex;
7030 7031
    static const float yv12_coef[]
            = {2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f};
7032

7033
    tex = arbfp_texture_target(type->res_type);
7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055

    /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
     * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective
     * bitdepth is 12 bits per pixel. Since the U and V planes have only half the
     * pitch of the luminance plane, the packing into the gl texture is a bit
     * unfortunate. If the whole texture is interpreted as luminance data it looks
     * approximately like this:
     *
     *        +----------------------------------+----
     *        |                                  |
     *        |                                  |
     *        |                                  |
     *        |                                  |
     *        |                                  |   2
     *        |            LUMINANCE             |   -
     *        |                                  |   3
     *        |                                  |
     *        |                                  |
     *        |                                  |
     *        |                                  |
     *        +----------------+-----------------+----
     *        |                |                 |
7056
     *        |  V even rows   |  V odd rows     |
7057 7058 7059
     *        |                |                 |   1
     *        +----------------+------------------   -
     *        |                |                 |   3
7060
     *        |  U even rows   |  U odd rows     |
7061 7062 7063 7064 7065 7066
     *        |                |                 |
     *        +----------------+-----------------+----
     *        |                |                 |
     *        |     0.5        |       0.5       |
     *
     * So it appears as if there are 4 chroma images, but in fact the odd rows
7067
     * in the chroma images are in the same row as the even ones. So it is
7068 7069 7070 7071 7072
     * kinda tricky to read
     *
     * When reading from rectangle textures, keep in mind that the input y coordinates
     * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height
     */
7073 7074 7075
    shader_addline(buffer, "PARAM yv12_coef = ");
    shader_arb_append_imm_vec4(buffer, yv12_coef);
    shader_addline(buffer, ";\n");
7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086

    shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
    /* the chroma planes have only half the width */
    shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");

    /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias
     * the coordinate. Also read the right side of the image when reading odd lines
     *
     * Don't forget to clamp the y values in into the range, otherwise we'll get filtering
     * bleeding
     */
7087
    if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7088
    {
7089 7090 7091 7092 7093 7094 7095
        shader_addline(buffer, "RCP chroma.w, size.y;\n");

        shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n");

        shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n");
        shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n");

7096
        /* Read odd lines from the right side (add size * 0.5 to the x coordinate). */
7097 7098 7099 7100 7101 7102 7103 7104 7105 7106
        shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
        shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
        shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
        shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n");

        /* clamp, keep the half pixel origin in mind */
        shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n");
        shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
        shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n");
        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
7107 7108 7109
    }
    else
    {
7110
        /* The y coordinate for V is in the range [size, size + size / 4). */
7111 7112 7113
        shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
        shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n");

7114
        /* Read odd lines from the right side (add size * 0.5 to the x coordinate). */
7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128
        shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
        shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
        shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
        shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n");
        shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n");

        /* Make sure to read exactly from the pixel center */
        shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
        shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n");

        /* Clamp */
        shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n");
        shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");
        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
7129
        shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n");
7130 7131 7132 7133
        shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
    }
    /* Read the texture, put the result into the output register */
    shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
7134
    shader_addline(buffer, "MOV chroma.x, temp.w;\n");
7135 7136 7137 7138

    /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th
     * No need to clamp because we're just reusing the already clamped value from above
     */
7139
    if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7140
        shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n");
7141
    else
7142 7143
        shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n");
    shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
7144
    shader_addline(buffer, "MOV chroma.y, temp.w;\n");
7145 7146 7147 7148 7149 7150

    /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
     * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
     * values due to filtering
     */
    shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
7151
    if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7152
    {
7153 7154 7155 7156 7157
        /* Multiply the y coordinate by 2/3 and clamp it */
        shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n");
        shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n");
        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
        shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
7158 7159 7160
    }
    else
    {
7161
        /* Reading from texture_rectangles is pretty straightforward, just use the unmodified
7162 7163 7164 7165 7166 7167
         * texture coordinate. It is still a good idea to clamp it though, since the opengl texture
         * is bigger
         */
        shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");
        shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");
        shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
7168
    }
7169 7170 7171
    *luminance = 'a';
}

7172 7173
static void gen_nv12_read(struct wined3d_string_buffer *buffer,
        const struct arbfp_blit_type *type, char *luminance)
7174 7175 7176 7177 7178
{
    const char *tex;
    static const float nv12_coef[]
            = {2.0f / 3.0f, 1.0f / 3.0f, 1.0f, 1.0f};

7179
    tex = arbfp_texture_target(type->res_type);
7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218

    /* NV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
     * sized plane where each component is an UV pair. So the effective
     * bitdepth is 12 bits per pixel If the whole texture is interpreted as luminance
     * data it looks approximately like this:
     *
     *        +----------------------------------+----
     *        |                                  |
     *        |                                  |
     *        |                                  |
     *        |                                  |
     *        |                                  |   2
     *        |            LUMINANCE             |   -
     *        |                                  |   3
     *        |                                  |
     *        |                                  |
     *        |                                  |
     *        |                                  |
     *        +----------------------------------+----
     *        |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
     *        |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
     *        |                                  |   1
     *        |                                  |   -
     *        |                                  |   3
     *        |                                  |
     *        |                                  |
     *        +----------------------------------+----
     *
     * When reading from rectangle textures, keep in mind that the input y coordinates
     * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height. */

    shader_addline(buffer, "PARAM nv12_coef = ");
    shader_arb_append_imm_vec4(buffer, nv12_coef);
    shader_addline(buffer, ";\n");

    shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
    /* We only have half the number of chroma pixels. */
    shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");

7219
    if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248
    {
        shader_addline(buffer, "RCP chroma.w, size.x;\n");
        shader_addline(buffer, "RCP chroma.z, size.y;\n");

        shader_addline(buffer, "MAD texcrd.y, texcrd.y, nv12_coef.y, nv12_coef.x;\n");

        /* We must not allow filtering horizontally, this would mix U and V.
         * Vertical filtering is ok. However, bear in mind that the pixel center is at
         * 0.5, so add 0.5. */

        /* Convert to non-normalized coordinates so we can find the
         * individual pixel. */
        shader_addline(buffer, "MUL texcrd.x, texcrd.x, size.x;\n");
        shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
        /* Multiply by 2 since chroma components are stored in UV pixel pairs,
         * add 0.5 to hit the center of the pixel. */
        shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n");

        /* Convert back to normalized coordinates. */
        shader_addline(buffer, "MUL texcrd.x, texcrd.x, chroma.w;\n");

        /* Clamp, keep the half pixel origin in mind. */
        shader_addline(buffer, "MAD temp.y, coef.y, chroma.z, nv12_coef.x;\n");
        shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
        shader_addline(buffer, "MAD temp.y, -coef.y, chroma.z, nv12_coef.z;\n");
        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
    }
    else
    {
7249
        /* The y coordinate for chroma is in the range [size, size + size / 2). */
7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267
        shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.y, size.y;\n");

        shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
        /* Multiply by 2 since chroma components are stored in UV pixel pairs,
         * add 0.5 to hit the center of the pixel. */
        shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n");

        /* Clamp */
        shader_addline(buffer, "MAD temp.y, size.y, coef.y, size.y;\n");
        shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");
        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
        shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n");
        shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
    }
    /* Read the texture, put the result into the output register. */
    shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
    shader_addline(buffer, "MOV chroma.y, temp.w;\n");

7268
    if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284
    {
        /* Add 1/size.x */
        shader_addline(buffer, "ADD texcrd.x, texcrd.x, chroma.w;\n");
    }
    else
    {
        /* Add 1 */
        shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.x;\n");
    }
    shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
    shader_addline(buffer, "MOV chroma.x, temp.w;\n");

    /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
     * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
     * values due to filtering. */
    shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
7285
    if (type->res_type == WINED3D_GL_RES_TYPE_TEX_2D)
7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305
    {
        /* Multiply the y coordinate by 2/3 and clamp it */
        shader_addline(buffer, "MUL texcrd.y, texcrd.y, nv12_coef.x;\n");
        shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, nv12_coef.x;\n");
        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
        shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
    }
    else
    {
        /* Reading from texture_rectangles is pretty straightforward, just use the unmodified
         * texture coordinate. It is still a good idea to clamp it though, since the opengl texture
         * is bigger
         */
        shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");
        shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");
        shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
    }
    *luminance = 'a';
}

7306
/* Context activation is done by the caller. */
7307
static GLuint gen_p8_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type)
7308
{
7309
    GLuint shader;
7310
    struct wined3d_string_buffer buffer;
7311
    const char *tex_target = arbfp_texture_target(type->res_type);
7312

7313 7314 7315 7316 7317
    /* This should not happen because we only use this conversion for
     * present blits which don't use color keying. */
    if (type->use_color_key)
        FIXME("Implement P8 color keying.\n");

7318
    /* Shader header */
7319
    if (!string_buffer_init(&buffer))
7320 7321 7322 7323 7324 7325 7326
    {
        ERR("Failed to initialize shader buffer.\n");
        return 0;
    }

    GL_EXTCALL(glGenProgramsARB(1, &shader));
    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
7327 7328
    if (!shader)
    {
7329
        string_buffer_free(&buffer);
7330 7331 7332 7333 7334 7335 7336 7337 7338 7339
        return 0;
    }

    shader_addline(&buffer, "!!ARBfp1.0\n");
    shader_addline(&buffer, "TEMP index;\n");

    /* { 255/256, 0.5/255*255/256, 0, 0 } */
    shader_addline(&buffer, "PARAM constants = { 0.996, 0.00195, 0, 0 };\n");

    /* The alpha-component contains the palette index */
7340
    shader_addline(&buffer, "TEX index, fragment.texcoord[0], texture[0], %s;\n", tex_target);
7341 7342 7343 7344 7345 7346 7347 7348

    /* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */
    shader_addline(&buffer, "MAD index.a, index.a, constants.x, constants.y;\n");

    /* Use the alpha-component as an index in the palette to get the final color */
    shader_addline(&buffer, "TEX result.color, index.a, texture[1], 1D;\n");
    shader_addline(&buffer, "END\n");

7349
    shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer);
7350

7351
    string_buffer_free(&buffer);
7352 7353 7354 7355

    return shader;
}

7356
/* Context activation is done by the caller. */
7357 7358
static void arbfp_blitter_upload_palette(struct wined3d_arbfp_blitter *blitter,
        const struct wined3d_texture_gl *texture_gl, struct wined3d_context_gl *context_gl)
7359
{
7360
    const struct wined3d_palette *palette = texture_gl->t.swapchain ? texture_gl->t.swapchain->palette : NULL;
7361
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
7362

7363 7364
    if (!blitter->palette_texture)
        gl_info->gl_ops.gl.p_glGenTextures(1, &blitter->palette_texture);
7365

7366
    GL_EXTCALL(glActiveTexture(GL_TEXTURE1));
7367
    gl_info->gl_ops.gl.p_glBindTexture(GL_TEXTURE_1D, blitter->palette_texture);
7368

7369
    gl_info->gl_ops.gl.p_glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
7370

7371
    gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
7372
    /* Make sure we have discrete color levels. */
7373 7374
    gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    gl_info->gl_ops.gl.p_glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
7375
    /* TODO: avoid unneeded uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */
7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387
    if (palette)
    {
        gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 256, 0, GL_BGRA,
                GL_UNSIGNED_INT_8_8_8_8_REV, palette->colors);
    }
    else
    {
        static const DWORD black;
        FIXME("P8 surface loaded without a palette.\n");
        gl_info->gl_ops.gl.p_glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, 1, 0, GL_BGRA,
                GL_UNSIGNED_INT_8_8_8_8_REV, &black);
    }
7388 7389

    /* Switch back to unit 0 in which the 2D texture will be stored. */
7390
    wined3d_context_gl_active_texture(context_gl, gl_info, 0);
7391 7392
}

7393
/* Context activation is done by the caller. */
7394
static GLuint gen_yuv_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type)
7395
{
7396
    GLuint shader;
7397
    struct wined3d_string_buffer buffer;
7398 7399
    char luminance_component;

7400 7401 7402
    if (type->use_color_key)
        FIXME("Implement YUV color keying.\n");

7403
    /* Shader header */
7404
    if (!string_buffer_init(&buffer))
7405 7406 7407 7408
    {
        ERR("Failed to initialize shader buffer.\n");
        return 0;
    }
7409 7410 7411 7412 7413

    GL_EXTCALL(glGenProgramsARB(1, &shader));
    checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))");
    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
    checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
7414 7415
    if (!shader)
    {
7416
        string_buffer_free(&buffer);
7417 7418
        return 0;
    }
7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429

    /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel,
     * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and
     * two chroma(U and V) values. Each macropixel has two luminance values, one for
     * each single pixel it contains, and one U and one V value shared between both
     * pixels.
     *
     * The data is loaded into an A8L8 texture. With YUY2, the luminance component
     * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus
     * take the format into account when generating the read swizzles
     *
7430
     * Reading the Y value is straightforward - just sample the texture. The hardware
7431 7432 7433 7434 7435 7436 7437 7438
     * takes care of filtering in the horizontal and vertical direction.
     *
     * Reading the U and V values is harder. We have to avoid filtering horizontally,
     * because that would mix the U and V values of one pixel or two adjacent pixels.
     * Thus floor the texture coordinate and add 0.5 to get an unfiltered read,
     * regardless of the filtering setting. Vertical filtering works automatically
     * though - the U and V values of two rows are mixed nicely.
     *
7439
     * Apart of avoiding filtering issues, the code has to know which value it just
7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460
     * read, and where it can find the other one. To determine this, it checks if
     * it sampled an even or odd pixel, and shifts the 2nd read accordingly.
     *
     * Handling horizontal filtering of U and V values requires reading a 2nd pair
     * of pixels, extracting U and V and mixing them. This is not implemented yet.
     *
     * An alternative implementation idea is to load the texture as A8R8G8B8 texture,
     * with width / 2. This way one read gives all 3 values, finding U and V is easy
     * in an unfiltered situation. Finding the luminance on the other hand requires
     * finding out if it is an odd or even pixel. The real drawback of this approach
     * is filtering. This would have to be emulated completely in the shader, reading
     * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and
     * vertically. Beyond that it would require adjustments to the texture handling
     * code to deal with the width scaling
     */
    shader_addline(&buffer, "!!ARBfp1.0\n");
    shader_addline(&buffer, "TEMP luminance;\n");
    shader_addline(&buffer, "TEMP temp;\n");
    shader_addline(&buffer, "TEMP chroma;\n");
    shader_addline(&buffer, "TEMP texcrd;\n");
    shader_addline(&buffer, "TEMP texcrd2;\n");
7461
    shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n");
7462
    shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n");
7463
    shader_addline(&buffer, "PARAM size = program.local[%u];\n", ARBFP_BLIT_PARAM_SIZE);
7464

7465
    switch (type->fixup)
7466
    {
7467 7468
        case COMPLEX_FIXUP_UYVY:
        case COMPLEX_FIXUP_YUY2:
7469
            gen_packed_yuv_read(&buffer, type, &luminance_component);
7470 7471
            break;

7472
        case COMPLEX_FIXUP_YV12:
7473
            gen_yv12_read(&buffer, type, &luminance_component);
7474 7475
            break;

7476
        case COMPLEX_FIXUP_NV12:
7477
            gen_nv12_read(&buffer, type, &luminance_component);
7478 7479
            break;

7480
        default:
7481
            FIXME("Unsupported YUV fixup %#x\n", type->fixup);
7482
            string_buffer_free(&buffer);
7483
            return 0;
7484 7485 7486 7487 7488 7489
    }

    /* Calculate the final result. Formula is taken from
     * http://www.fourcc.org/fccyvrgb.php. Note that the chroma
     * ranges from -0.5 to 0.5
     */
7490
    shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n");
7491

7492 7493 7494 7495
    shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component);
    shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component);
    shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n");
    shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component);
7496 7497
    shader_addline(&buffer, "END\n");

7498
    shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer);
7499

7500
    string_buffer_free(&buffer);
7501 7502 7503 7504

    return shader;
}

7505
/* Context activation is done by the caller. */
7506
static GLuint arbfp_gen_plain_shader(const struct wined3d_gl_info *gl_info, const struct arbfp_blit_type *type)
7507
{
7508
    GLuint shader;
7509
    struct wined3d_string_buffer buffer;
7510
    const char *tex_target = arbfp_texture_target(type->res_type);
7511 7512

    /* Shader header */
7513
    if (!string_buffer_init(&buffer))
7514 7515 7516 7517 7518 7519 7520 7521
    {
        ERR("Failed to initialize shader buffer.\n");
        return 0;
    }

    GL_EXTCALL(glGenProgramsARB(1, &shader));
    if (!shader)
    {
7522
        string_buffer_free(&buffer);
7523 7524 7525 7526 7527
        return 0;
    }
    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));

    shader_addline(&buffer, "!!ARBfp1.0\n");
7528 7529 7530 7531

    if (type->use_color_key)
    {
        shader_addline(&buffer, "TEMP color;\n");
7532 7533 7534
        shader_addline(&buffer, "TEMP less, greater;\n");
        shader_addline(&buffer, "PARAM color_key_low = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_LOW);
        shader_addline(&buffer, "PARAM color_key_high = program.local[%u];\n", ARBFP_BLIT_PARAM_COLOR_KEY_HIGH);
7535
        shader_addline(&buffer, "TEX color, fragment.texcoord[0], texture[0], %s;\n", tex_target);
7536 7537 7538 7539 7540 7541
        shader_addline(&buffer, "SLT less, color, color_key_low;\n"); /* below low key */
        shader_addline(&buffer, "SGE greater, color, color_key_high;\n"); /* above high key */
        shader_addline(&buffer, "ADD less, less, greater;\n"); /* or */
        shader_addline(&buffer, "DP4 less.b, less, less;\n"); /* on any channel */
        shader_addline(&buffer, "SGE less, -less.b, 0.0;\n"); /* logical not */
        shader_addline(&buffer, "KIL -less;\n"); /* discard if true */
7542 7543 7544 7545 7546 7547 7548
        shader_addline(&buffer, "MOV result.color, color;\n");
    }
    else
    {
        shader_addline(&buffer, "TEX result.color, fragment.texcoord[0], texture[0], %s;\n", tex_target);
    }

7549 7550
    shader_addline(&buffer, "END\n");

7551
    shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer.buffer);
7552

7553
    string_buffer_free(&buffer);
7554 7555 7556 7557

    return shader;
}

7558
/* Context activation is done by the caller. */
7559
static HRESULT arbfp_blit_set(struct wined3d_arbfp_blitter *blitter, struct wined3d_context_gl *context_gl,
7560
        const struct wined3d_texture_gl *texture_gl, unsigned int sub_resource_idx,
7561
        const struct wined3d_color_key *color_key)
7562
{
7563
    const struct wined3d_gl_info *gl_info = context_gl->gl_info;
7564
    enum complex_fixup fixup;
7565 7566 7567
    struct wine_rb_entry *entry;
    struct arbfp_blit_type type;
    struct arbfp_blit_desc *desc;
7568
    struct wined3d_color float_color_key[2];
7569
    struct wined3d_vec4 size;
7570
    unsigned int level;
7571 7572
    GLuint shader;

7573 7574 7575
    level = sub_resource_idx % texture_gl->t.level_count;
    size.x = wined3d_texture_get_level_pow2_width(&texture_gl->t, level);
    size.y = wined3d_texture_get_level_pow2_height(&texture_gl->t, level);
7576 7577
    size.z = 1.0f;
    size.w = 1.0f;
7578

7579 7580
    if (is_complex_fixup(texture_gl->t.resource.format->color_fixup))
        fixup = get_complex_fixup(texture_gl->t.resource.format->color_fixup);
7581 7582
    else
        fixup = COMPLEX_FIXUP_NONE;
7583

7584
    switch (texture_gl->target)
7585 7586
    {
        case GL_TEXTURE_1D:
7587
            type.res_type = WINED3D_GL_RES_TYPE_TEX_1D;
7588 7589 7590
            break;

        case GL_TEXTURE_2D:
7591
            type.res_type = WINED3D_GL_RES_TYPE_TEX_2D;
7592 7593 7594
            break;

        case GL_TEXTURE_3D:
7595
            type.res_type = WINED3D_GL_RES_TYPE_TEX_3D;
7596 7597 7598
            break;

        case GL_TEXTURE_CUBE_MAP_ARB:
7599
            type.res_type = WINED3D_GL_RES_TYPE_TEX_CUBE;
7600 7601 7602
            break;

        case GL_TEXTURE_RECTANGLE_ARB:
7603
            type.res_type = WINED3D_GL_RES_TYPE_TEX_RECT;
7604 7605 7606
            break;

        default:
7607
            ERR("Unexpected GL texture type %#x.\n", texture_gl->target);
7608
            type.res_type = WINED3D_GL_RES_TYPE_TEX_2D;
7609
    }
7610
    type.fixup = fixup;
7611
    type.use_color_key = !!color_key;
7612
    type.padding = 0;
7613

7614
    if ((entry = wine_rb_get(&blitter->shaders, &type)))
7615
    {
7616 7617 7618 7619 7620 7621 7622
        desc = WINE_RB_ENTRY_VALUE(entry, struct arbfp_blit_desc, entry);
        shader = desc->shader;
    }
    else
    {
        switch (fixup)
        {
7623
            case COMPLEX_FIXUP_NONE:
7624
                if (!is_identity_fixup(texture_gl->t.resource.format->color_fixup))
7625
                    FIXME("Implement support for sign or swizzle fixups.\n");
7626
                shader = arbfp_gen_plain_shader(gl_info, &type);
7627 7628
                break;

7629
            case COMPLEX_FIXUP_P8:
7630
                shader = gen_p8_shader(gl_info, &type);
7631
                break;
7632

7633 7634 7635 7636
            case COMPLEX_FIXUP_YUY2:
            case COMPLEX_FIXUP_UYVY:
            case COMPLEX_FIXUP_YV12:
            case COMPLEX_FIXUP_NV12:
7637
                shader = gen_yuv_shader(gl_info, &type);
7638
                break;
7639 7640 7641 7642

            default:
                FIXME("Unsupported fixup %#x.\n", fixup);
                return E_NOTIMPL;
7643
        }
7644

7645 7646
        if (!shader)
        {
7647
            ERR("Failed to get shader for fixup %#x.\n", fixup);
7648
            return E_NOTIMPL;
7649 7650
        }

7651
        if (!(desc = heap_alloc(sizeof(*desc))))
7652 7653
            goto err_out;

7654
        desc->type = type;
7655
        desc->shader = shader;
7656
        if (wine_rb_put(&blitter->shaders, &desc->type, &desc->entry) == -1)
7657 7658 7659 7660 7661 7662 7663
        {
err_out:
            ERR("Out of memory\n");
            GL_EXTCALL(glDeleteProgramsARB(1, &shader));
            checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader))");
            GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0));
            checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0)");
7664
            heap_free(desc);
7665 7666
            return E_OUTOFMEMORY;
        }
7667 7668
    }

7669
    if (fixup == COMPLEX_FIXUP_P8)
7670
        arbfp_blitter_upload_palette(blitter, texture_gl, context_gl);
7671

7672
    gl_info->gl_ops.gl.p_glEnable(GL_FRAGMENT_PROGRAM_ARB);
7673 7674 7675
    checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
    checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
7676
    GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARBFP_BLIT_PARAM_SIZE, &size.x));
7677
    checkGLcall("glProgramLocalParameter4fvARB");
7678 7679
    if (type.use_color_key)
    {
7680
        wined3d_format_get_float_color_key(texture_gl->t.resource.format, color_key, float_color_key);
7681 7682
        GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
                ARBFP_BLIT_PARAM_COLOR_KEY_LOW, &float_color_key[0].r));
7683
        GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
7684
                ARBFP_BLIT_PARAM_COLOR_KEY_HIGH, &float_color_key[1].r));
7685 7686
        checkGLcall("glProgramLocalParameter4fvARB");
    }
7687 7688 7689 7690

    return WINED3D_OK;
}

7691
/* Context activation is done by the caller. */
7692 7693
static void arbfp_blit_unset(const struct wined3d_gl_info *gl_info)
{
7694
    gl_info->gl_ops.gl.p_glDisable(GL_FRAGMENT_PROGRAM_ARB);
7695 7696 7697
    checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
}

7698 7699 7700
static BOOL arbfp_blit_supported(enum wined3d_blit_op blit_op, const struct wined3d_context *context,
        const struct wined3d_resource *src_resource, DWORD src_location,
        const struct wined3d_resource *dst_resource, DWORD dst_location)
7701
{
7702 7703
    const struct wined3d_format *src_format = src_resource->format;
    const struct wined3d_format *dst_format = dst_resource->format;
7704
    enum complex_fixup src_fixup;
7705
    BOOL decompress;
7706

7707 7708 7709
    if (src_resource->type != WINED3D_RTYPE_TEXTURE_2D)
        return FALSE;

7710 7711
    if (blit_op == WINED3D_BLIT_OP_RAW_BLIT && dst_format->id == src_format->id)
    {
7712
        if (dst_format->depth_size || dst_format->stencil_size)
7713 7714 7715 7716 7717
            blit_op = WINED3D_BLIT_OP_DEPTH_BLIT;
        else
            blit_op = WINED3D_BLIT_OP_COLOR_BLIT;
    }

7718
    switch (blit_op)
7719
    {
7720
        case WINED3D_BLIT_OP_COLOR_BLIT_CKEY:
7721
            if (!context->d3d_info->shader_color_key)
7722 7723 7724 7725 7726
            {
                /* The conversion modifies the alpha channel so the color key might no longer match. */
                TRACE("Color keying not supported with converted textures.\n");
                return FALSE;
            }
7727
        case WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST:
7728
        case WINED3D_BLIT_OP_COLOR_BLIT:
7729 7730 7731 7732 7733
            break;

        default:
            TRACE("Unsupported blit_op=%d\n", blit_op);
            return FALSE;
7734 7735
    }

7736
    decompress = (src_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED)
7737
            && !(dst_format->flags[WINED3D_GL_RES_TYPE_TEX_2D] & WINED3DFMT_FLAG_COMPRESSED);
7738
    if (!decompress && !(src_resource->access & dst_resource->access & WINED3D_RESOURCE_ACCESS_GPU))
7739 7740
        return FALSE;

7741
    src_fixup = get_complex_fixup(src_format->color_fixup);
7742 7743 7744
    if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
    {
        TRACE("Checking support for fixup:\n");
7745
        dump_color_fixup_desc(src_format->color_fixup);
7746 7747
    }

7748 7749
    if (!is_identity_fixup(dst_format->color_fixup)
            && (dst_format->id != src_format->id || dst_location != WINED3D_LOCATION_DRAWABLE))
7750 7751 7752 7753 7754
    {
        TRACE("Destination fixups are not supported\n");
        return FALSE;
    }

7755
    if (is_identity_fixup(src_format->color_fixup))
7756 7757 7758 7759 7760
    {
        TRACE("[OK]\n");
        return TRUE;
    }

7761
     /* We only support YUV conversions. */
7762
    if (!is_complex_fixup(src_format->color_fixup))
7763
    {
7764 7765 7766 7767 7768 7769
        if (wined3d_settings.offscreen_rendering_mode == ORM_BACKBUFFER)
        {
            WARN("Claiming fixup support because of ORM_BACKBUFFER.\n");
            return TRUE;
        }

7770 7771 7772 7773
        TRACE("[FAILED]\n");
        return FALSE;
    }

7774
    switch(src_fixup)
7775
    {
7776 7777 7778
        case COMPLEX_FIXUP_YUY2:
        case COMPLEX_FIXUP_UYVY:
        case COMPLEX_FIXUP_YV12:
7779
        case COMPLEX_FIXUP_NV12:
7780
        case COMPLEX_FIXUP_P8:
7781 7782
            TRACE("[OK]\n");
            return TRUE;
7783

7784
        default:
7785
            FIXME("Unsupported YUV fixup %#x\n", src_fixup);
7786 7787 7788 7789 7790
            TRACE("[FAILED]\n");
            return FALSE;
    }
}

7791
static DWORD arbfp_blitter_blit(struct wined3d_blitter *blitter, enum wined3d_blit_op op,
7792 7793 7794
        struct wined3d_context *context, struct wined3d_texture *src_texture, unsigned int src_sub_resource_idx,
        DWORD src_location, const RECT *src_rect, struct wined3d_texture *dst_texture,
        unsigned int dst_sub_resource_idx, DWORD dst_location, const RECT *dst_rect,
7795
        const struct wined3d_color_key *color_key, enum wined3d_texture_filter_type filter)
7796
{
7797
    struct wined3d_texture_gl *src_texture_gl = wined3d_texture_gl(src_texture);
7798
    struct wined3d_context_gl *context_gl = wined3d_context_gl(context);
7799
    struct wined3d_device *device = dst_texture->resource.device;
7800
    struct wined3d_texture *staging_texture = NULL;
7801
    struct wined3d_arbfp_blitter *arbfp_blitter;
7802
    struct wined3d_color_key alpha_test_key;
7803
    struct wined3d_blitter *next;
7804
    unsigned int src_level;
7805
    RECT s, d;
7806

7807 7808 7809 7810 7811 7812
    TRACE("blitter %p, op %#x, context %p, src_texture %p, src_sub_resource_idx %u, src_location %s, src_rect %s, "
            "dst_texture %p, dst_sub_resource_idx %u, dst_location %s, dst_rect %s, colour_key %p, filter %s.\n",
            blitter, op, context, src_texture, src_sub_resource_idx, wined3d_debug_location(src_location),
            wine_dbgstr_rect(src_rect), dst_texture, dst_sub_resource_idx, wined3d_debug_location(dst_location),
            wine_dbgstr_rect(dst_rect), color_key, debug_d3dtexturefiltertype(filter));

7813 7814
    if (!arbfp_blit_supported(op, context, &src_texture->resource, src_location,
            &dst_texture->resource, dst_location))
7815
    {
7816 7817 7818 7819 7820 7821 7822 7823 7824
        if (!(next = blitter->next))
        {
            ERR("No blitter to handle blit op %#x.\n", op);
            return dst_location;
        }

        TRACE("Forwarding to blitter %p.\n", next);
        return next->ops->blitter_blit(next, op, context, src_texture, src_sub_resource_idx, src_location,
                src_rect, dst_texture, dst_sub_resource_idx, dst_location, dst_rect, color_key, filter);
7825 7826 7827 7828
    }

    arbfp_blitter = CONTAINING_RECORD(blitter, struct wined3d_arbfp_blitter, blitter);

7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842
    if (!(src_texture->resource.access & WINED3D_RESOURCE_ACCESS_GPU))
    {
        struct wined3d_resource_desc desc;
        struct wined3d_box upload_box;
        HRESULT hr;

        TRACE("Source texture is not GPU accessible, creating a staging texture.\n");

        src_level = src_sub_resource_idx % src_texture->level_count;
        desc.resource_type = WINED3D_RTYPE_TEXTURE_2D;
        desc.format = src_texture->resource.format->id;
        desc.multisample_type = src_texture->resource.multisample_type;
        desc.multisample_quality = src_texture->resource.multisample_quality;
        desc.usage = WINED3DUSAGE_PRIVATE;
7843
        desc.bind_flags = 0;
7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861
        desc.access = WINED3D_RESOURCE_ACCESS_GPU;
        desc.width = wined3d_texture_get_level_width(src_texture, src_level);
        desc.height = wined3d_texture_get_level_height(src_texture, src_level);
        desc.depth = 1;
        desc.size = 0;

        if (FAILED(hr = wined3d_texture_create(device, &desc, 1, 1, 0,
                NULL, NULL, &wined3d_null_parent_ops, &staging_texture)))
        {
            ERR("Failed to create staging texture, hr %#x.\n", hr);
            return dst_location;
        }

        wined3d_box_set(&upload_box, 0, 0, desc.width, desc.height, 0, desc.depth);
        wined3d_texture_upload_from_texture(staging_texture, 0, 0, 0, 0,
                src_texture, src_sub_resource_idx, &upload_box);

        src_texture = staging_texture;
7862
        src_texture_gl = wined3d_texture_gl(src_texture);
7863 7864 7865
        src_sub_resource_idx = 0;
    }
    else if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
7866
            && (src_texture->sub_resources[src_sub_resource_idx].locations
7867
            & (WINED3D_LOCATION_TEXTURE_RGB | WINED3D_LOCATION_DRAWABLE)) == WINED3D_LOCATION_DRAWABLE
7868
            && !wined3d_resource_is_offscreen(&src_texture->resource))
7869
    {
7870

7871
        /* Without FBO blits transferring from the drawable to the texture is
7872 7873 7874 7875
         * expensive, because we have to flip the data in sysmem. Since we can
         * flip in the blitter, we don't actually need that flip anyway. So we
         * use the surface's texture as scratch texture, and flip the source
         * rectangle instead. */
7876
        texture2d_load_fb_texture(src_texture_gl, src_sub_resource_idx, FALSE, context);
7877

7878
        s = *src_rect;
7879
        src_level = src_sub_resource_idx % src_texture->level_count;
7880 7881
        s.top = wined3d_texture_get_level_height(src_texture, src_level) - s.top;
        s.bottom = wined3d_texture_get_level_height(src_texture, src_level) - s.bottom;
7882
        src_rect = &s;
7883 7884
    }
    else
7885
    {
7886
        wined3d_texture_load(src_texture, context, FALSE);
7887
    }
7888

7889
    wined3d_context_gl_apply_ffp_blit_state(context_gl, device);
7890

7891
    if (dst_location == WINED3D_LOCATION_DRAWABLE)
7892 7893
    {
        d = *dst_rect;
7894
        wined3d_texture_translate_drawable_coords(dst_texture, context_gl->window, &d);
7895 7896
        dst_rect = &d;
    }
7897

7898 7899 7900 7901 7902 7903
    if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
    {
        GLenum buffer;

        if (dst_location == WINED3D_LOCATION_DRAWABLE)
        {
7904
            TRACE("Destination texture %p is onscreen.\n", dst_texture);
7905 7906 7907 7908
            buffer = wined3d_texture_get_gl_buffer(dst_texture);
        }
        else
        {
7909
            TRACE("Destination texture %p is offscreen.\n", dst_texture);
7910 7911
            buffer = GL_COLOR_ATTACHMENT0;
        }
7912
        wined3d_context_gl_apply_fbo_state_blit(context_gl, GL_DRAW_FRAMEBUFFER,
7913
                &dst_texture->resource, dst_sub_resource_idx, NULL, 0, dst_location);
7914
        wined3d_context_gl_set_draw_buffer(context_gl, buffer);
7915
        wined3d_context_gl_check_fbo_status(context_gl, GL_DRAW_FRAMEBUFFER);
7916 7917 7918
        context_invalidate_state(context, STATE_FRAMEBUFFER);
    }

7919 7920
    if (op == WINED3D_BLIT_OP_COLOR_BLIT_ALPHATEST)
    {
7921
        const struct wined3d_format *fmt = src_texture->resource.format;
7922 7923 7924 7925 7926
        alpha_test_key.color_space_low_value = 0;
        alpha_test_key.color_space_high_value = ~(((1u << fmt->alpha_size) - 1) << fmt->alpha_offset);
        color_key = &alpha_test_key;
    }

7927
    arbfp_blit_set(arbfp_blitter, context_gl, src_texture_gl, src_sub_resource_idx, color_key);
7928 7929

    /* Draw a textured quad */
7930
    wined3d_context_gl_draw_textured_quad(context_gl, src_texture_gl,
7931
            src_sub_resource_idx, src_rect, dst_rect, filter);
7932 7933

    /* Leave the opengl state valid for blitting */
7934
    arbfp_blit_unset(context_gl->gl_info);
7935

7936
    if (dst_texture->swapchain && (dst_texture->swapchain->front_buffer == dst_texture))
7937
        context_gl->gl_info->gl_ops.gl.p_glFlush();
7938

7939 7940 7941
    if (staging_texture)
        wined3d_texture_decref(staging_texture);

7942
    return dst_location;
7943 7944
}

7945
static void arbfp_blitter_clear(struct wined3d_blitter *blitter, struct wined3d_device *device,
7946
        unsigned int rt_count, const struct wined3d_fb_state *fb, unsigned int rect_count, const RECT *clear_rects,
7947
        const RECT *draw_rect, DWORD flags, const struct wined3d_color *colour, float depth, DWORD stencil)
7948
{
7949 7950 7951
    struct wined3d_blitter *next;

    if ((next = blitter->next))
7952 7953
        next->ops->blitter_clear(next, device, rt_count, fb, rect_count,
                clear_rects, draw_rect, flags, colour, depth, stencil);
7954 7955
}

7956
static const struct wined3d_blitter_ops arbfp_blitter_ops =
7957
{
7958 7959 7960
    arbfp_blitter_destroy,
    arbfp_blitter_clear,
    arbfp_blitter_blit,
7961
};
7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974

void wined3d_arbfp_blitter_create(struct wined3d_blitter **next, const struct wined3d_device *device)
{
    const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
    struct wined3d_arbfp_blitter *blitter;

    if (device->shader_backend != &arb_program_shader_backend
            && device->shader_backend != &glsl_shader_backend)
        return;

    if (!gl_info->supported[ARB_FRAGMENT_PROGRAM])
        return;

7975 7976 7977
    if (!gl_info->supported[WINED3D_GL_LEGACY_CONTEXT])
        return;

7978
    if (!(blitter = heap_alloc(sizeof(*blitter))))
7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991
    {
        ERR("Failed to allocate blitter.\n");
        return;
    }

    TRACE("Created blitter %p.\n", blitter);

    blitter->blitter.ops = &arbfp_blitter_ops;
    blitter->blitter.next = *next;
    wine_rb_init(&blitter->shaders, arbfp_blit_type_compare);
    blitter->palette_texture = 0;
    *next = &blitter->blitter;
}