shader.c 171 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright 2002-2003 Jason Edmeades
 * Copyright 2002-2003 Raphael Junqueira
 * Copyright 2004 Christian Costa
 * Copyright 2005 Oliver Stieber
 * Copyright 2006 Ivan Gyurdiev
7
 * Copyright 2007-2008, 2013 Stefan Dösinger for CodeWeavers
8
 * Copyright 2009-2011 Henri Verbeet for CodeWeavers
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

#include "config.h"
26
#include "wine/port.h"
27 28

#include <stdio.h>
29
#include <string.h>
30 31 32 33

#include "wined3d_private.h"

WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34

35 36 37 38 39 40 41
const struct wined3d_vec4 wined3d_srgb_const[] =
{
    /* pow, mul_high, sub_high, mul_low */
    {0.41666f, 1.055f, 0.055f, 12.92f},
    /* cmp */
    {0.0031308f, 0.0f, 0.0f, 0.0f},
};
42

43
static const char * const shader_opcode_names[] =
44
{
45 46 47
    /* WINED3DSIH_ABS                              */ "abs",
    /* WINED3DSIH_ADD                              */ "add",
    /* WINED3DSIH_AND                              */ "and",
48
    /* WINED3DSIH_ATOMIC_AND                       */ "atomic_and",
49
    /* WINED3DSIH_ATOMIC_CMP_STORE                 */ "atomic_cmp_store",
50
    /* WINED3DSIH_ATOMIC_IADD                      */ "atomic_iadd",
51
    /* WINED3DSIH_ATOMIC_IMAX                      */ "atomic_imax",
52
    /* WINED3DSIH_ATOMIC_IMIN                      */ "atomic_imin",
53
    /* WINED3DSIH_ATOMIC_OR                        */ "atomic_or",
54
    /* WINED3DSIH_ATOMIC_UMAX                      */ "atomic_umax",
55
    /* WINED3DSIH_ATOMIC_UMIN                      */ "atomic_umin",
56
    /* WINED3DSIH_ATOMIC_XOR                       */ "atomic_xor",
57
    /* WINED3DSIH_BEM                              */ "bem",
58
    /* WINED3DSIH_BFI                              */ "bfi",
59
    /* WINED3DSIH_BFREV                            */ "bfrev",
60 61 62
    /* WINED3DSIH_BREAK                            */ "break",
    /* WINED3DSIH_BREAKC                           */ "breakc",
    /* WINED3DSIH_BREAKP                           */ "breakp",
63
    /* WINED3DSIH_BUFINFO                          */ "bufinfo",
64 65
    /* WINED3DSIH_CALL                             */ "call",
    /* WINED3DSIH_CALLNZ                           */ "callnz",
66
    /* WINED3DSIH_CASE                             */ "case",
67 68
    /* WINED3DSIH_CMP                              */ "cmp",
    /* WINED3DSIH_CND                              */ "cnd",
69
    /* WINED3DSIH_CONTINUE                         */ "continue",
70
    /* WINED3DSIH_CONTINUEP                        */ "continuec",
71
    /* WINED3DSIH_COUNTBITS                        */ "countbits",
72 73
    /* WINED3DSIH_CRS                              */ "crs",
    /* WINED3DSIH_CUT                              */ "cut",
74
    /* WINED3DSIH_CUT_STREAM                       */ "cut_stream",
75 76
    /* WINED3DSIH_DCL                              */ "dcl",
    /* WINED3DSIH_DCL_CONSTANT_BUFFER              */ "dcl_constantBuffer",
77 78
    /* WINED3DSIH_DCL_FUNCTION_BODY                */ "dcl_function_body",
    /* WINED3DSIH_DCL_FUNCTION_TABLE               */ "dcl_function_table",
79
    /* WINED3DSIH_DCL_GLOBAL_FLAGS                 */ "dcl_globalFlags",
80
    /* WINED3DSIH_DCL_GS_INSTANCES                 */ "dcl_gs_instances",
81
    /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ "dcl_hs_fork_phase_instance_count",
82
    /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ "dcl_hs_join_phase_instance_count",
83
    /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR            */ "dcl_hs_max_tessfactor",
84
    /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER    */ "dcl_immediateConstantBuffer",
85
    /* WINED3DSIH_DCL_INDEX_RANGE                  */ "dcl_index_range",
86
    /* WINED3DSIH_DCL_INDEXABLE_TEMP               */ "dcl_indexableTemp",
87 88 89 90 91 92 93 94
    /* WINED3DSIH_DCL_INPUT                        */ "dcl_input",
    /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT    */ "dcl_input_control_point_count",
    /* WINED3DSIH_DCL_INPUT_PRIMITIVE              */ "dcl_inputPrimitive",
    /* WINED3DSIH_DCL_INPUT_PS                     */ "dcl_input_ps",
    /* WINED3DSIH_DCL_INPUT_PS_SGV                 */ "dcl_input_ps_sgv",
    /* WINED3DSIH_DCL_INPUT_PS_SIV                 */ "dcl_input_ps_siv",
    /* WINED3DSIH_DCL_INPUT_SGV                    */ "dcl_input_sgv",
    /* WINED3DSIH_DCL_INPUT_SIV                    */ "dcl_input_siv",
95
    /* WINED3DSIH_DCL_INTERFACE                    */ "dcl_interface",
96 97 98 99
    /* WINED3DSIH_DCL_OUTPUT                       */ "dcl_output",
    /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT   */ "dcl_output_control_point_count",
    /* WINED3DSIH_DCL_OUTPUT_SIV                   */ "dcl_output_siv",
    /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY              */ "dcl_outputTopology",
100
    /* WINED3DSIH_DCL_RESOURCE_RAW                 */ "dcl_resource_raw",
101 102
    /* WINED3DSIH_DCL_RESOURCE_STRUCTURED          */ "dcl_resource_structured",
    /* WINED3DSIH_DCL_SAMPLER                      */ "dcl_sampler",
103
    /* WINED3DSIH_DCL_STREAM                       */ "dcl_stream",
104
    /* WINED3DSIH_DCL_TEMPS                        */ "dcl_temps",
105
    /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN           */ "dcl_tessellator_domain",
106
    /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ "dcl_tessellator_output_primitive",
107
    /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING     */ "dcl_tessellator_partitioning",
108
    /* WINED3DSIH_DCL_TGSM_RAW                     */ "dcl_tgsm_raw",
109
    /* WINED3DSIH_DCL_TGSM_STRUCTURED              */ "dcl_tgsm_structured",
110
    /* WINED3DSIH_DCL_THREAD_GROUP                 */ "dcl_thread_group",
111
    /* WINED3DSIH_DCL_UAV_RAW                      */ "dcl_uav_raw",
112
    /* WINED3DSIH_DCL_UAV_STRUCTURED               */ "dcl_uav_structured",
113 114 115
    /* WINED3DSIH_DCL_UAV_TYPED                    */ "dcl_uav_typed",
    /* WINED3DSIH_DCL_VERTICES_OUT                 */ "dcl_maxOutputVertexCount",
    /* WINED3DSIH_DEF                              */ "def",
116
    /* WINED3DSIH_DEFAULT                          */ "default",
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
    /* WINED3DSIH_DEFB                             */ "defb",
    /* WINED3DSIH_DEFI                             */ "defi",
    /* WINED3DSIH_DIV                              */ "div",
    /* WINED3DSIH_DP2                              */ "dp2",
    /* WINED3DSIH_DP2ADD                           */ "dp2add",
    /* WINED3DSIH_DP3                              */ "dp3",
    /* WINED3DSIH_DP4                              */ "dp4",
    /* WINED3DSIH_DST                              */ "dst",
    /* WINED3DSIH_DSX                              */ "dsx",
    /* WINED3DSIH_DSX_COARSE                       */ "deriv_rtx_coarse",
    /* WINED3DSIH_DSX_FINE                         */ "deriv_rtx_fine",
    /* WINED3DSIH_DSY                              */ "dsy",
    /* WINED3DSIH_DSY_COARSE                       */ "deriv_rty_coarse",
    /* WINED3DSIH_DSY_FINE                         */ "deriv_rty_fine",
    /* WINED3DSIH_ELSE                             */ "else",
    /* WINED3DSIH_EMIT                             */ "emit",
133
    /* WINED3DSIH_EMIT_STREAM                      */ "emit_stream",
134 135 136
    /* WINED3DSIH_ENDIF                            */ "endif",
    /* WINED3DSIH_ENDLOOP                          */ "endloop",
    /* WINED3DSIH_ENDREP                           */ "endrep",
137
    /* WINED3DSIH_ENDSWITCH                        */ "endswitch",
138
    /* WINED3DSIH_EQ                               */ "eq",
139
    /* WINED3DSIH_EVAL_SAMPLE_INDEX                */ "eval_sample_index",
140 141
    /* WINED3DSIH_EXP                              */ "exp",
    /* WINED3DSIH_EXPP                             */ "expp",
142
    /* WINED3DSIH_F16TOF32                         */ "f16tof32",
143
    /* WINED3DSIH_F32TOF16                         */ "f32tof16",
144
    /* WINED3DSIH_FCALL                            */ "fcall",
145 146 147
    /* WINED3DSIH_FIRSTBIT_HI                      */ "firstbit_hi",
    /* WINED3DSIH_FIRSTBIT_LO                      */ "firstbit_lo",
    /* WINED3DSIH_FIRSTBIT_SHI                     */ "firstbit_shi",
148 149 150
    /* WINED3DSIH_FRC                              */ "frc",
    /* WINED3DSIH_FTOI                             */ "ftoi",
    /* WINED3DSIH_FTOU                             */ "ftou",
151
    /* WINED3DSIH_GATHER4                          */ "gather4",
152
    /* WINED3DSIH_GATHER4_C                        */ "gather4_c",
153
    /* WINED3DSIH_GATHER4_PO                       */ "gather4_po",
154
    /* WINED3DSIH_GATHER4_PO_C                     */ "gather4_po_c",
155
    /* WINED3DSIH_GE                               */ "ge",
156
    /* WINED3DSIH_HS_CONTROL_POINT_PHASE           */ "hs_control_point_phase",
157 158
    /* WINED3DSIH_HS_DECLS                         */ "hs_decls",
    /* WINED3DSIH_HS_FORK_PHASE                    */ "hs_fork_phase",
159
    /* WINED3DSIH_HS_JOIN_PHASE                    */ "hs_join_phase",
160
    /* WINED3DSIH_IADD                             */ "iadd",
161
    /* WINED3DSIH_IBFE                             */ "ibfe",
162 163 164 165 166 167 168 169
    /* WINED3DSIH_IEQ                              */ "ieq",
    /* WINED3DSIH_IF                               */ "if",
    /* WINED3DSIH_IFC                              */ "ifc",
    /* WINED3DSIH_IGE                              */ "ige",
    /* WINED3DSIH_ILT                              */ "ilt",
    /* WINED3DSIH_IMAD                             */ "imad",
    /* WINED3DSIH_IMAX                             */ "imax",
    /* WINED3DSIH_IMIN                             */ "imin",
170
    /* WINED3DSIH_IMM_ATOMIC_ALLOC                 */ "imm_atomic_alloc",
171
    /* WINED3DSIH_IMM_ATOMIC_AND                   */ "imm_atomic_and",
172
    /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH              */ "imm_atomic_cmp_exch",
173
    /* WINED3DSIH_IMM_ATOMIC_CONSUME               */ "imm_atomic_consume",
174
    /* WINED3DSIH_IMM_ATOMIC_EXCH                  */ "imm_atomic_exch",
175
    /* WINED3DSIH_IMM_ATOMIC_IADD                  */ "imm_atomic_iadd",
176
    /* WINED3DSIH_IMM_ATOMIC_IMAX                  */ "imm_atomic_imax",
177
    /* WINED3DSIH_IMM_ATOMIC_IMIN                  */ "imm_atomic_imin",
178
    /* WINED3DSIH_IMM_ATOMIC_OR                    */ "imm_atomic_or",
179
    /* WINED3DSIH_IMM_ATOMIC_UMAX                  */ "imm_atomic_umax",
180
    /* WINED3DSIH_IMM_ATOMIC_UMIN                  */ "imm_atomic_umin",
181
    /* WINED3DSIH_IMM_ATOMIC_XOR                   */ "imm_atomic_xor",
182 183 184 185
    /* WINED3DSIH_IMUL                             */ "imul",
    /* WINED3DSIH_INE                              */ "ine",
    /* WINED3DSIH_INEG                             */ "ineg",
    /* WINED3DSIH_ISHL                             */ "ishl",
186
    /* WINED3DSIH_ISHR                             */ "ishr",
187 188 189 190
    /* WINED3DSIH_ITOF                             */ "itof",
    /* WINED3DSIH_LABEL                            */ "label",
    /* WINED3DSIH_LD                               */ "ld",
    /* WINED3DSIH_LD2DMS                           */ "ld2dms",
191
    /* WINED3DSIH_LD_RAW                           */ "ld_raw",
192
    /* WINED3DSIH_LD_STRUCTURED                    */ "ld_structured",
193
    /* WINED3DSIH_LD_UAV_TYPED                     */ "ld_uav_typed",
194
    /* WINED3DSIH_LIT                              */ "lit",
195
    /* WINED3DSIH_LOD                              */ "lod",
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
    /* WINED3DSIH_LOG                              */ "log",
    /* WINED3DSIH_LOGP                             */ "logp",
    /* WINED3DSIH_LOOP                             */ "loop",
    /* WINED3DSIH_LRP                              */ "lrp",
    /* WINED3DSIH_LT                               */ "lt",
    /* WINED3DSIH_M3x2                             */ "m3x2",
    /* WINED3DSIH_M3x3                             */ "m3x3",
    /* WINED3DSIH_M3x4                             */ "m3x4",
    /* WINED3DSIH_M4x3                             */ "m4x3",
    /* WINED3DSIH_M4x4                             */ "m4x4",
    /* WINED3DSIH_MAD                              */ "mad",
    /* WINED3DSIH_MAX                              */ "max",
    /* WINED3DSIH_MIN                              */ "min",
    /* WINED3DSIH_MOV                              */ "mov",
    /* WINED3DSIH_MOVA                             */ "mova",
    /* WINED3DSIH_MOVC                             */ "movc",
    /* WINED3DSIH_MUL                              */ "mul",
    /* WINED3DSIH_NE                               */ "ne",
    /* WINED3DSIH_NOP                              */ "nop",
    /* WINED3DSIH_NOT                              */ "not",
    /* WINED3DSIH_NRM                              */ "nrm",
    /* WINED3DSIH_OR                               */ "or",
    /* WINED3DSIH_PHASE                            */ "phase",
    /* WINED3DSIH_POW                              */ "pow",
    /* WINED3DSIH_RCP                              */ "rcp",
    /* WINED3DSIH_REP                              */ "rep",
    /* WINED3DSIH_RESINFO                          */ "resinfo",
    /* WINED3DSIH_RET                              */ "ret",
224
    /* WINED3DSIH_RETP                             */ "retp",
225
    /* WINED3DSIH_ROUND_NE                         */ "round_ne",
226 227 228 229 230 231 232 233 234
    /* WINED3DSIH_ROUND_NI                         */ "round_ni",
    /* WINED3DSIH_ROUND_PI                         */ "round_pi",
    /* WINED3DSIH_ROUND_Z                          */ "round_z",
    /* WINED3DSIH_RSQ                              */ "rsq",
    /* WINED3DSIH_SAMPLE                           */ "sample",
    /* WINED3DSIH_SAMPLE_B                         */ "sample_b",
    /* WINED3DSIH_SAMPLE_C                         */ "sample_c",
    /* WINED3DSIH_SAMPLE_C_LZ                      */ "sample_c_lz",
    /* WINED3DSIH_SAMPLE_GRAD                      */ "sample_d",
235
    /* WINED3DSIH_SAMPLE_INFO                      */ "sample_info",
236
    /* WINED3DSIH_SAMPLE_LOD                       */ "sample_l",
237
    /* WINED3DSIH_SAMPLE_POS                       */ "sample_pos",
238 239 240 241 242 243
    /* WINED3DSIH_SETP                             */ "setp",
    /* WINED3DSIH_SGE                              */ "sge",
    /* WINED3DSIH_SGN                              */ "sgn",
    /* WINED3DSIH_SINCOS                           */ "sincos",
    /* WINED3DSIH_SLT                              */ "slt",
    /* WINED3DSIH_SQRT                             */ "sqrt",
244
    /* WINED3DSIH_STORE_RAW                        */ "store_raw",
245
    /* WINED3DSIH_STORE_STRUCTURED                 */ "store_structured",
246 247
    /* WINED3DSIH_STORE_UAV_TYPED                  */ "store_uav_typed",
    /* WINED3DSIH_SUB                              */ "sub",
248
    /* WINED3DSIH_SWAPC                            */ "swapc",
249
    /* WINED3DSIH_SWITCH                           */ "switch",
250
    /* WINED3DSIH_SYNC                             */ "sync",
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
    /* WINED3DSIH_TEX                              */ "texld",
    /* WINED3DSIH_TEXBEM                           */ "texbem",
    /* WINED3DSIH_TEXBEML                          */ "texbeml",
    /* WINED3DSIH_TEXCOORD                         */ "texcrd",
    /* WINED3DSIH_TEXDEPTH                         */ "texdepth",
    /* WINED3DSIH_TEXDP3                           */ "texdp3",
    /* WINED3DSIH_TEXDP3TEX                        */ "texdp3tex",
    /* WINED3DSIH_TEXKILL                          */ "texkill",
    /* WINED3DSIH_TEXLDD                           */ "texldd",
    /* WINED3DSIH_TEXLDL                           */ "texldl",
    /* WINED3DSIH_TEXM3x2DEPTH                     */ "texm3x2depth",
    /* WINED3DSIH_TEXM3x2PAD                       */ "texm3x2pad",
    /* WINED3DSIH_TEXM3x2TEX                       */ "texm3x2tex",
    /* WINED3DSIH_TEXM3x3                          */ "texm3x3",
    /* WINED3DSIH_TEXM3x3DIFF                      */ "texm3x3diff",
    /* WINED3DSIH_TEXM3x3PAD                       */ "texm3x3pad",
    /* WINED3DSIH_TEXM3x3SPEC                      */ "texm3x3spec",
    /* WINED3DSIH_TEXM3x3TEX                       */ "texm3x3tex",
    /* WINED3DSIH_TEXM3x3VSPEC                     */ "texm3x3vspec",
    /* WINED3DSIH_TEXREG2AR                        */ "texreg2ar",
    /* WINED3DSIH_TEXREG2GB                        */ "texreg2gb",
    /* WINED3DSIH_TEXREG2RGB                       */ "texreg2rgb",
273
    /* WINED3DSIH_UBFE                             */ "ubfe",
274 275
    /* WINED3DSIH_UDIV                             */ "udiv",
    /* WINED3DSIH_UGE                              */ "uge",
276
    /* WINED3DSIH_ULT                              */ "ult",
277
    /* WINED3DSIH_UMAX                             */ "umax",
278
    /* WINED3DSIH_UMIN                             */ "umin",
279
    /* WINED3DSIH_UMUL                             */ "umul",
280 281 282
    /* WINED3DSIH_USHR                             */ "ushr",
    /* WINED3DSIH_UTOF                             */ "utof",
    /* WINED3DSIH_XOR                              */ "xor",
283 284
};

285
static const char * const semantic_names[] =
286
{
287 288 289 290 291 292 293 294 295 296 297 298 299 300
    /* WINED3D_DECL_USAGE_POSITION      */ "SV_POSITION",
    /* WINED3D_DECL_USAGE_BLEND_WEIGHT  */ "BLENDWEIGHT",
    /* WINED3D_DECL_USAGE_BLEND_INDICES */ "BLENDINDICES",
    /* WINED3D_DECL_USAGE_NORMAL        */ "NORMAL",
    /* WINED3D_DECL_USAGE_PSIZE         */ "PSIZE",
    /* WINED3D_DECL_USAGE_TEXCOORD      */ "TEXCOORD",
    /* WINED3D_DECL_USAGE_TANGENT       */ "TANGENT",
    /* WINED3D_DECL_USAGE_BINORMAL      */ "BINORMAL",
    /* WINED3D_DECL_USAGE_TESS_FACTOR   */ "TESSFACTOR",
    /* WINED3D_DECL_USAGE_POSITIONT     */ "POSITIONT",
    /* WINED3D_DECL_USAGE_COLOR         */ "COLOR",
    /* WINED3D_DECL_USAGE_FOG           */ "FOG",
    /* WINED3D_DECL_USAGE_DEPTH         */ "DEPTH",
    /* WINED3D_DECL_USAGE_SAMPLE        */ "SAMPLE",
301 302
};

303 304
static const struct
{
305
    enum wined3d_shader_input_sysval_semantic sysval_semantic;
306 307
    const char *sysval_name;
}
308
shader_input_sysval_semantic_names[] =
309
{
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
    {WINED3D_SIV_POSITION,                   "position"},
    {WINED3D_SIV_CLIP_DISTANCE,              "clip_distance"},
    {WINED3D_SIV_CULL_DISTANCE,              "cull_distance"},
    {WINED3D_SIV_RENDER_TARGET_ARRAY_INDEX,  "render_target_array_index"},
    {WINED3D_SIV_VIEWPORT_ARRAY_INDEX,       "viewport_array_index"},
    {WINED3D_SIV_VERTEX_ID,                  "vertex_id"},
    {WINED3D_SIV_INSTANCE_ID,                "instance_id"},
    {WINED3D_SIV_PRIMITIVE_ID,               "primitive_id"},
    {WINED3D_SIV_IS_FRONT_FACE,              "is_front_face"},
    {WINED3D_SIV_SAMPLE_INDEX,               "sample_index"},
    {WINED3D_SIV_QUAD_U0_TESS_FACTOR,        "finalQuadUeq0EdgeTessFactor"},
    {WINED3D_SIV_QUAD_V0_TESS_FACTOR,        "finalQuadVeq0EdgeTessFactor"},
    {WINED3D_SIV_QUAD_U1_TESS_FACTOR,        "finalQuadUeq1EdgeTessFactor"},
    {WINED3D_SIV_QUAD_V1_TESS_FACTOR,        "finalQuadVeq1EdgeTessFactor"},
    {WINED3D_SIV_QUAD_U_INNER_TESS_FACTOR,   "finalQuadUInsideTessFactor"},
    {WINED3D_SIV_QUAD_V_INNER_TESS_FACTOR,   "finalQuadVInsideTessFactor"},
    {WINED3D_SIV_TRIANGLE_U_TESS_FACTOR,     "finalTriUeq0EdgeTessFactor"},
    {WINED3D_SIV_TRIANGLE_V_TESS_FACTOR,     "finalTriVeq0EdgeTessFactor"},
    {WINED3D_SIV_TRIANGLE_W_TESS_FACTOR,     "finalTriWeq0EdgeTessFactor"},
    {WINED3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"},
    {WINED3D_SIV_LINE_DETAIL_TESS_FACTOR,    "finalLineDetailTessFactor"},
    {WINED3D_SIV_LINE_DENSITY_TESS_FACTOR,   "finalLineDensityTessFactor"},
332 333
};

334 335 336
static void shader_dump_src_param(struct wined3d_string_buffer *buffer,
        const struct wined3d_shader_src_param *param, const struct wined3d_shader_version *shader_version);

337 338
const char *debug_d3dshaderinstructionhandler(enum WINED3D_SHADER_INSTRUCTION_HANDLER handler_idx)
{
339
    if (handler_idx >= ARRAY_SIZE(shader_opcode_names))
340 341 342 343 344
        return wine_dbg_sprintf("UNRECOGNIZED(%#x)", handler_idx);

    return shader_opcode_names[handler_idx];
}

345
static const char *shader_semantic_name_from_usage(enum wined3d_decl_usage usage)
346
{
347
    if (usage >= ARRAY_SIZE(semantic_names))
348 349 350 351 352 353 354 355
    {
        FIXME("Unrecognized usage %#x.\n", usage);
        return "UNRECOGNIZED";
    }

    return semantic_names[usage];
}

356
static enum wined3d_decl_usage shader_usage_from_semantic_name(const char *name)
357 358 359
{
    unsigned int i;

360
    for (i = 0; i < ARRAY_SIZE(semantic_names); ++i)
361
    {
362 363
        if (!strcmp(name, semantic_names[i]))
            return i;
364 365 366 367 368
    }

    return ~0U;
}

369 370 371 372 373 374 375 376 377 378 379
static enum wined3d_sysval_semantic shader_sysval_semantic_from_usage(enum wined3d_decl_usage usage)
{
    switch (usage)
    {
        case WINED3D_DECL_USAGE_POSITION:
            return WINED3D_SV_POSITION;
        default:
            return 0;
    }
}

380
BOOL shader_match_semantic(const char *semantic_name, enum wined3d_decl_usage usage)
381 382 383 384 385 386 387 388 389
{
    return !strcmp(semantic_name, shader_semantic_name_from_usage(usage));
}

static void shader_signature_from_semantic(struct wined3d_shader_signature_element *e,
        const struct wined3d_shader_semantic *s)
{
    e->semantic_name = shader_semantic_name_from_usage(s->usage);
    e->semantic_idx = s->usage_idx;
390
    e->stream_idx = 0;
391
    e->sysval_semantic = shader_sysval_semantic_from_usage(s->usage);
392
    e->component_type = WINED3D_TYPE_FLOAT;
393
    e->register_idx = s->reg.reg.idx[0].offset;
394 395 396
    e->mask = s->reg.write_mask;
}

397
static void shader_signature_from_usage(struct wined3d_shader_signature_element *e,
398
        enum wined3d_decl_usage usage, UINT usage_idx, UINT reg_idx, DWORD write_mask)
399 400 401
{
    e->semantic_name = shader_semantic_name_from_usage(usage);
    e->semantic_idx = usage_idx;
402
    e->stream_idx = 0;
403
    e->sysval_semantic = shader_sysval_semantic_from_usage(usage);
404
    e->component_type = WINED3D_TYPE_FLOAT;
405 406 407 408
    e->register_idx = reg_idx;
    e->mask = write_mask;
}

409
static const struct wined3d_shader_frontend *shader_select_frontend(enum wined3d_shader_byte_code_format format)
410
{
411
    switch (format)
412
    {
413
        case WINED3D_SHADER_BYTE_CODE_FORMAT_SM1:
414 415
            return &sm1_shader_frontend;

416
        case WINED3D_SHADER_BYTE_CODE_FORMAT_SM4:
417 418 419
            return &sm4_shader_frontend;

        default:
420
            WARN("Invalid byte code format %#x specified.\n", format);
421 422 423 424
            return NULL;
    }
}

425
void string_buffer_clear(struct wined3d_string_buffer *buffer)
426 427
{
    buffer->buffer[0] = '\0';
428
    buffer->content_size = 0;
429 430
}

431
BOOL string_buffer_init(struct wined3d_string_buffer *buffer)
432
{
433
    buffer->buffer_size = 32;
434
    if (!(buffer->buffer = heap_alloc(buffer->buffer_size)))
435 436 437 438 439
    {
        ERR("Failed to allocate shader buffer memory.\n");
        return FALSE;
    }

440
    string_buffer_clear(buffer);
441 442 443
    return TRUE;
}

444
void string_buffer_free(struct wined3d_string_buffer *buffer)
445
{
446
    heap_free(buffer->buffer);
447 448
}

449
BOOL string_buffer_resize(struct wined3d_string_buffer *buffer, int rc)
450
{
451
    char *new_buffer;
452
    unsigned int new_buffer_size = buffer->buffer_size * 2;
453

454 455
    while (rc > 0 && (unsigned int)rc >= new_buffer_size - buffer->content_size)
        new_buffer_size *= 2;
456
    if (!(new_buffer = heap_realloc(buffer->buffer, new_buffer_size)))
457
    {
458 459 460 461 462 463 464 465
        ERR("Failed to grow buffer.\n");
        buffer->buffer[buffer->content_size] = '\0';
        return FALSE;
    }
    buffer->buffer = new_buffer;
    buffer->buffer_size = new_buffer_size;
    return TRUE;
}
466

467 468 469 470
int shader_vaddline(struct wined3d_string_buffer *buffer, const char *format, va_list args)
{
    unsigned int rem;
    int rc;
471

472 473 474 475
    rem = buffer->buffer_size - buffer->content_size;
    rc = vsnprintf(&buffer->buffer[buffer->content_size], rem, format, args);
    if (rc < 0 /* C89 */ || (unsigned int)rc >= rem /* C99 */)
        return rc;
476

477
    buffer->content_size += rc;
478 479 480
    return 0;
}

481
int shader_addline(struct wined3d_string_buffer *buffer, const char *format, ...)
482 483 484 485
{
    va_list args;
    int ret;

486 487 488 489 490 491 492 493 494 495
    for (;;)
    {
        va_start(args, format);
        ret = shader_vaddline(buffer, format, args);
        va_end(args);
        if (!ret)
            return ret;
        if (!string_buffer_resize(buffer, ret))
            return -1;
    }
496 497
}

498 499 500 501 502 503
struct wined3d_string_buffer *string_buffer_get(struct wined3d_string_buffer_list *list)
{
    struct wined3d_string_buffer *buffer;

    if (list_empty(&list->list))
    {
504
        buffer = heap_alloc(sizeof(*buffer));
505 506 507
        if (!buffer || !string_buffer_init(buffer))
        {
            ERR("Couldn't allocate buffer for temporary string.\n");
508
            heap_free(buffer);
509 510 511 512 513 514 515 516 517 518 519 520
            return NULL;
        }
    }
    else
    {
        buffer = LIST_ENTRY(list_head(&list->list), struct wined3d_string_buffer, entry);
        list_remove(&buffer->entry);
    }
    string_buffer_clear(buffer);
    return buffer;
}

521
static int string_buffer_vsprintf(struct wined3d_string_buffer *buffer, const char *format, va_list args)
522 523
{
    if (!buffer)
524
        return 0;
525
    string_buffer_clear(buffer);
526
    return shader_vaddline(buffer, format, args);
527 528 529 530 531
}

void string_buffer_sprintf(struct wined3d_string_buffer *buffer, const char *format, ...)
{
    va_list args;
532
    int ret;
533

534 535 536 537 538 539 540 541 542 543
    for (;;)
    {
        va_start(args, format);
        ret = string_buffer_vsprintf(buffer, format, args);
        va_end(args);
        if (!ret)
            return;
        if (!string_buffer_resize(buffer, ret))
            return;
    }
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
}

void string_buffer_release(struct wined3d_string_buffer_list *list, struct wined3d_string_buffer *buffer)
{
    if (!buffer)
        return;
    list_add_head(&list->list, &buffer->entry);
}

void string_buffer_list_init(struct wined3d_string_buffer_list *list)
{
    list_init(&list->list);
}

void string_buffer_list_cleanup(struct wined3d_string_buffer_list *list)
{
    struct wined3d_string_buffer *buffer, *buffer_next;

    LIST_FOR_EACH_ENTRY_SAFE(buffer, buffer_next, &list->list, struct wined3d_string_buffer, entry)
    {
        string_buffer_free(buffer);
565
        heap_free(buffer);
566 567 568 569
    }
    list_init(&list->list);
}

570 571
/* Convert floating point offset relative to a register file to an absolute
 * offset for float constants. */
572
static unsigned int shader_get_float_offset(enum wined3d_shader_register_type register_type, UINT register_idx)
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
{
    switch (register_type)
    {
        case WINED3DSPR_CONST: return register_idx;
        case WINED3DSPR_CONST2: return 2048 + register_idx;
        case WINED3DSPR_CONST3: return 4096 + register_idx;
        case WINED3DSPR_CONST4: return 6144 + register_idx;
        default:
            FIXME("Unsupported register type: %u.\n", register_type);
            return register_idx;
    }
}

static void shader_delete_constant_list(struct list *clist)
{
588
    struct wined3d_shader_lconst *constant, *constant_next;
589

590
    LIST_FOR_EACH_ENTRY_SAFE(constant, constant_next, clist, struct wined3d_shader_lconst, entry)
591
        heap_free(constant);
592 593 594
    list_init(clist);
}

595
static void shader_set_limits(struct wined3d_shader *shader)
596
{
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
    static const struct limits_entry
    {
        unsigned int min_version;
        unsigned int max_version;
        struct wined3d_shader_limits limits;
    }
    vs_limits[] =
    {
        /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
        {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 1), { 0,  0, 256,  0, 12,  0}},
        {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 1), { 0, 16, 256, 16, 12,  0}},
        /* DX10 cards on Windows advertise a D3D9 constant limit of 256
         * even though they are capable of supporting much more (GL
         * drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
         * wined3d-advertised maximum. Clamp the constant limit for <= 3.0
         * shaders to 256. */
        {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), { 4, 16, 256, 16, 12,  0}},
        {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16,  0,   0,  0, 16,  0}},
615
        {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0, 32,  0}},
616 617
        {0}
    },
618 619 620 621 622
    hs_limits[] =
    {
        /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
        {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0, 32, 32}},
    },
623 624 625 626 627
    ds_limits[] =
    {
        /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
        {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0, 32, 32}},
    },
628 629 630 631
    gs_limits[] =
    {
        /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
        {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16,  0,   0,  0, 32, 16}},
632
        {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0, 32, 32}},
633 634 635 636 637 638 639
        {0}
    },
    ps_limits[] =
    {
        /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
        {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 3), { 4,  0,   8,  0,  0,  0}},
        {WINED3D_SHADER_VERSION(1, 4), WINED3D_SHADER_VERSION(1, 4), { 6,  0,   8,  0,  0,  0}},
640 641
        {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 0), {16,  0,  32,  0,  0,  0}},
        {WINED3D_SHADER_VERSION(2, 1), WINED3D_SHADER_VERSION(2, 1), {16, 16,  32, 16,  0,  0}},
642
        {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), {16, 16, 224, 16,  0, 10}},
643
        {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0,  0, 32}},
644
        {0}
645 646 647 648 649
    },
    cs_limits[] =
    {
        /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
        {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16,  0,   0,  0,  0,  0}},
650 651
    };
    const struct limits_entry *limits_array;
652 653
    DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major,
            shader->reg_maps.shader_version.minor);
654
    int i = 0;
655

656
    switch (shader->reg_maps.shader_version.type)
657
    {
658 659 660 661 662
        default:
            FIXME("Unexpected shader type %u found.\n", shader->reg_maps.shader_version.type);
            /* Fall-through. */
        case WINED3D_SHADER_TYPE_VERTEX:
            limits_array = vs_limits;
663
            break;
664 665 666
        case WINED3D_SHADER_TYPE_HULL:
            limits_array = hs_limits;
            break;
667 668 669
        case WINED3D_SHADER_TYPE_DOMAIN:
            limits_array = ds_limits;
            break;
670 671
        case WINED3D_SHADER_TYPE_GEOMETRY:
            limits_array = gs_limits;
672
            break;
673 674
        case WINED3D_SHADER_TYPE_PIXEL:
            limits_array = ps_limits;
675
            break;
676 677 678
        case WINED3D_SHADER_TYPE_COMPUTE:
            limits_array = cs_limits;
            break;
679 680
    }

681
    while (limits_array[i].min_version && limits_array[i].min_version <= shader_version)
682
    {
683 684 685
        if (shader_version <= limits_array[i].max_version)
        {
            shader->limits = &limits_array[i].limits;
686
            break;
687 688
        }
        ++i;
689
    }
690
    if (!shader->limits)
691
    {
692 693 694 695
        FIXME("Unexpected shader version \"%u.%u\".\n",
                shader->reg_maps.shader_version.major,
                shader->reg_maps.shader_version.minor);
        shader->limits = &limits_array[max(0, i - 1)].limits;
696 697 698 699 700
    }
}

static BOOL shader_record_register_usage(struct wined3d_shader *shader, struct wined3d_shader_reg_maps *reg_maps,
        const struct wined3d_shader_register *reg, enum wined3d_shader_type shader_type, unsigned int constf_size)
701 702 703 704
{
    switch (reg->type)
    {
        case WINED3DSPR_TEXTURE: /* WINED3DSPR_ADDR */
705
            if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
706
                reg_maps->texcoord |= 1u << reg->idx[0].offset;
707
            else
708
                reg_maps->address |= 1u << reg->idx[0].offset;
709 710 711
            break;

        case WINED3DSPR_TEMP:
712
            reg_maps->temporary |= 1u << reg->idx[0].offset;
713 714 715
            break;

        case WINED3DSPR_INPUT:
716 717
            if (reg->idx[0].rel_addr)
                reg_maps->input_rel_addressing = 1;
718 719
            if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
            {
720 721 722 723
                /* If relative addressing is used, we must assume that all
                 * registers are used. Even if it is a construct like v3[aL],
                 * we can't assume that v0, v1 and v2 aren't read because aL
                 * can be negative. */
724
                if (reg->idx[0].rel_addr)
725
                    shader->u.ps.input_reg_used = ~0u;
726
                else
727
                    shader->u.ps.input_reg_used |= 1u << reg->idx[0].offset;
728
            }
729
            else
730
            {
731
                reg_maps->input_registers |= 1u << reg->idx[0].offset;
732
            }
733 734 735
            break;

        case WINED3DSPR_RASTOUT:
736 737
            if (reg->idx[0].offset == 1)
                reg_maps->fog = 1;
738 739
            if (reg->idx[0].offset == 2)
                reg_maps->point_size = 1;
740 741 742 743 744
            break;

        case WINED3DSPR_MISCTYPE:
            if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
            {
745 746 747 748
                if (!reg->idx[0].offset)
                    reg_maps->vpos = 1;
                else if (reg->idx[0].offset == 1)
                    reg_maps->usesfacing = 1;
749 750 751 752
            }
            break;

        case WINED3DSPR_CONST:
753
            if (reg->idx[0].rel_addr)
754
            {
755 756 757 758
                if (reg->idx[0].offset < reg_maps->min_rel_offset)
                    reg_maps->min_rel_offset = reg->idx[0].offset;
                if (reg->idx[0].offset > reg_maps->max_rel_offset)
                    reg_maps->max_rel_offset = reg->idx[0].offset;
759 760 761 762
                reg_maps->usesrelconstF = TRUE;
            }
            else
            {
763
                if (reg->idx[0].offset >= min(shader->limits->constant_float, constf_size))
764 765 766 767 768 769
                {
                    WARN("Shader using float constant %u which is not supported.\n", reg->idx[0].offset);
                    return FALSE;
                }
                else
                {
770
                    wined3d_insert_bits(reg_maps->constf, reg->idx[0].offset, 1, 0x1);
771
                }
772 773 774 775
            }
            break;

        case WINED3DSPR_CONSTINT:
776
            if (reg->idx[0].offset >= shader->limits->constant_int)
777 778 779 780 781 782
            {
                WARN("Shader using integer constant %u which is not supported.\n", reg->idx[0].offset);
                return FALSE;
            }
            else
            {
783
                reg_maps->integer_constants |= (1u << reg->idx[0].offset);
784
            }
785 786 787
            break;

        case WINED3DSPR_CONSTBOOL:
788
            if (reg->idx[0].offset >= shader->limits->constant_bool)
789 790 791 792 793 794
            {
                WARN("Shader using bool constant %u which is not supported.\n", reg->idx[0].offset);
                return FALSE;
            }
            else
            {
795
                reg_maps->boolean_constants |= (1u << reg->idx[0].offset);
796
            }
797 798 799
            break;

        case WINED3DSPR_COLOROUT:
800
            reg_maps->rt_mask |= (1u << reg->idx[0].offset);
801 802
            break;

803 804 805 806
        case WINED3DSPR_OUTCONTROLPOINT:
            reg_maps->vocp = 1;
            break;

807 808 809 810
        case WINED3DSPR_SAMPLEMASK:
            reg_maps->sample_mask = 1;
            break;

811
        default:
812 813
            TRACE("Not recording register of type %#x and [%#x][%#x].\n",
                    reg->type, reg->idx[0].offset, reg->idx[1].offset);
814 815
            break;
    }
816
    return TRUE;
817 818
}

819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
static void shader_record_sample(struct wined3d_shader_reg_maps *reg_maps,
        unsigned int resource_idx, unsigned int sampler_idx, unsigned int bind_idx)
{
    struct wined3d_shader_sampler_map_entry *entries, *entry;
    struct wined3d_shader_sampler_map *map;
    unsigned int i;

    map = &reg_maps->sampler_map;
    entries = map->entries;
    for (i = 0; i < map->count; ++i)
    {
        if (entries[i].resource_idx == resource_idx && entries[i].sampler_idx == sampler_idx)
            return;
    }

    if (!map->size)
    {
836
        if (!(entries = heap_calloc(4, sizeof(*entries))))
837 838 839 840 841 842 843 844 845 846 847 848
        {
            ERR("Failed to allocate sampler map entries.\n");
            return;
        }
        map->size = 4;
        map->entries = entries;
    }
    else if (map->count == map->size)
    {
        size_t new_size = map->size * 2;

        if (sizeof(*entries) * new_size <= sizeof(*entries) * map->size
849
                || !(entries = heap_realloc(entries, sizeof(*entries) * new_size)))
850 851 852 853 854 855 856 857 858 859 860 861 862 863
        {
            ERR("Failed to resize sampler map entries.\n");
            return;
        }
        map->size = new_size;
        map->entries = entries;
    }

    entry = &entries[map->count++];
    entry->resource_idx = resource_idx;
    entry->sampler_idx = sampler_idx;
    entry->bind_idx = bind_idx;
}

864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
static unsigned int get_instr_extra_regcount(enum WINED3D_SHADER_INSTRUCTION_HANDLER instr, unsigned int param)
{
    switch (instr)
    {
        case WINED3DSIH_M4x4:
        case WINED3DSIH_M3x4:
            return param == 1 ? 3 : 0;

        case WINED3DSIH_M4x3:
        case WINED3DSIH_M3x3:
            return param == 1 ? 2 : 0;

        case WINED3DSIH_M3x2:
            return param == 1 ? 1 : 0;

        default:
            return 0;
    }
}

884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903
static HRESULT shader_reg_maps_add_tgsm(struct wined3d_shader_reg_maps *reg_maps,
        unsigned int register_idx, unsigned int size, unsigned int stride)
{
    struct wined3d_shader_tgsm *tgsm;

    if (register_idx >= MAX_TGSM_REGISTERS)
    {
        ERR("Invalid TGSM register index %u.\n", register_idx);
        return S_OK;
    }
    if (reg_maps->shader_version.type != WINED3D_SHADER_TYPE_COMPUTE)
    {
        FIXME("TGSM declarations are allowed only in compute shaders.\n");
        return S_OK;
    }

    if (!wined3d_array_reserve((void **)&reg_maps->tgsm, &reg_maps->tgsm_capacity,
            register_idx + 1, sizeof(*reg_maps->tgsm)))
        return E_OUTOFMEMORY;

904
    reg_maps->tgsm_count = max(register_idx + 1, reg_maps->tgsm_count);
905 906 907 908 909 910
    tgsm = &reg_maps->tgsm[register_idx];
    tgsm->size = size;
    tgsm->stride = stride;
    return S_OK;
}

911 912 913 914 915 916 917 918 919 920 921 922 923 924
static HRESULT shader_record_shader_phase(struct wined3d_shader *shader,
        struct wined3d_shader_phase **current_phase, const struct wined3d_shader_instruction *ins,
        const DWORD *current_instruction_ptr, const DWORD *previous_instruction_ptr)
{
    struct wined3d_shader_phase *phase;

    if ((phase = *current_phase))
    {
        phase->end = previous_instruction_ptr;
        *current_phase = NULL;
    }

    if (shader->reg_maps.shader_version.type != WINED3D_SHADER_TYPE_HULL)
    {
925
        ERR("Unexpected shader type %s.\n", debug_shader_type(shader->reg_maps.shader_version.type));
926 927 928 929 930 931 932 933 934
        return E_FAIL;
    }

    switch (ins->handler_idx)
    {
        case WINED3DSIH_HS_CONTROL_POINT_PHASE:
            if (shader->u.hs.phases.control_point)
            {
                FIXME("Multiple control point phases.\n");
935
                heap_free(shader->u.hs.phases.control_point);
936
            }
937
            if (!(shader->u.hs.phases.control_point = heap_alloc_zero(sizeof(*shader->u.hs.phases.control_point))))
938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
                return E_OUTOFMEMORY;
            phase = shader->u.hs.phases.control_point;
            break;
        case WINED3DSIH_HS_FORK_PHASE:
            if (!wined3d_array_reserve((void **)&shader->u.hs.phases.fork,
                    &shader->u.hs.phases.fork_size, shader->u.hs.phases.fork_count + 1,
                    sizeof(*shader->u.hs.phases.fork)))
                return E_OUTOFMEMORY;
            phase = &shader->u.hs.phases.fork[shader->u.hs.phases.fork_count++];
            break;
        case WINED3DSIH_HS_JOIN_PHASE:
            if (!wined3d_array_reserve((void **)&shader->u.hs.phases.join,
                    &shader->u.hs.phases.join_size, shader->u.hs.phases.join_count + 1,
                    sizeof(*shader->u.hs.phases.join)))
                return E_OUTOFMEMORY;
            phase = &shader->u.hs.phases.join[shader->u.hs.phases.join_count++];
            break;
        default:
            ERR("Unexpected opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
            return E_FAIL;
    }

    phase->start = current_instruction_ptr;
    *current_phase = phase;

    return WINED3D_OK;
}

966
static HRESULT shader_calculate_clip_or_cull_distance_mask(
967
        const struct wined3d_shader_signature_element *e, unsigned int *mask)
968
{
969
    /* Clip and cull distances are packed in 4 component registers. 0 and 1 are
970 971
     * the only allowed semantic indices.
     */
972
    if (e->semantic_idx >= WINED3D_MAX_CLIP_DISTANCES / 4)
973
    {
974
        *mask = 0;
975 976 977 978
        WARN("Invalid clip/cull distance index %u.\n", e->semantic_idx);
        return WINED3DERR_INVALIDCALL;
    }

979
    *mask = (e->mask & WINED3DSP_WRITEMASK_ALL) << (4 * e->semantic_idx);
980 981 982
    return WINED3D_OK;
}

983 984 985 986 987 988 989 990 991 992
static void wined3d_insert_interpolation_mode(DWORD *packed_interpolation_mode,
        unsigned int register_idx, enum wined3d_shader_interpolation_mode mode)
{
    if (mode > WINED3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE)
        FIXME("Unexpected interpolation mode %#x.\n", mode);

    wined3d_insert_bits(packed_interpolation_mode,
            register_idx * WINED3D_PACKED_INTERPOLATION_BIT_COUNT, WINED3D_PACKED_INTERPOLATION_BIT_COUNT, mode);
}

993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
static HRESULT shader_scan_output_signature(struct wined3d_shader *shader)
{
    const struct wined3d_shader_signature *output_signature = &shader->output_signature;
    struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
    unsigned int i;
    HRESULT hr;

    for (i = 0; i < output_signature->element_count; ++i)
    {
        const struct wined3d_shader_signature_element *e = &output_signature->elements[i];
        unsigned int mask;

        reg_maps->output_registers |= 1u << e->register_idx;
        if (e->sysval_semantic == WINED3D_SV_CLIP_DISTANCE)
        {
            if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
                return hr;
            reg_maps->clip_distance_mask |= mask;
        }
        else if (e->sysval_semantic == WINED3D_SV_CULL_DISTANCE)
        {
            if (FAILED(hr = shader_calculate_clip_or_cull_distance_mask(e, &mask)))
                return hr;
            reg_maps->cull_distance_mask |= mask;
        }
        else if (e->sysval_semantic == WINED3D_SV_VIEWPORT_ARRAY_INDEX)
        {
            reg_maps->viewport_array = 1;
        }
    }

    return WINED3D_OK;
}

1027
/* Note that this does not count the loop register as an address register. */
1028
static HRESULT shader_get_registers_used(struct wined3d_shader *shader, DWORD constf_size)
1029
{
1030
    struct wined3d_shader_signature_element input_signature_elements[max(MAX_ATTRIBS, MAX_REG_INPUT)];
1031
    struct wined3d_shader_signature_element output_signature_elements[MAX_REG_OUTPUT];
1032 1033 1034 1035
    struct wined3d_shader_signature *output_signature = &shader->output_signature;
    struct wined3d_shader_signature *input_signature = &shader->input_signature;
    struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
    const struct wined3d_shader_frontend *fe = shader->frontend;
1036 1037
    unsigned int cur_loop_depth = 0, max_loop_depth = 0;
    struct wined3d_shader_version shader_version;
1038 1039 1040
    struct wined3d_shader_phase *phase = NULL;
    const DWORD *ptr, *prev_ins, *current_ins;
    void *fe_data = shader->frontend_data;
1041
    unsigned int i;
1042
    HRESULT hr;
1043 1044

    memset(reg_maps, 0, sizeof(*reg_maps));
1045
    memset(input_signature_elements, 0, sizeof(input_signature_elements));
1046
    memset(output_signature_elements, 0, sizeof(output_signature_elements));
1047
    reg_maps->min_rel_offset = ~0U;
1048
    list_init(&reg_maps->indexable_temps);
1049 1050

    fe->shader_read_header(fe_data, &ptr, &shader_version);
1051
    prev_ins = current_ins = ptr;
1052 1053
    reg_maps->shader_version = shader_version;

1054
    shader_set_limits(shader);
1055

1056
    if (!(reg_maps->constf = heap_calloc(((min(shader->limits->constant_float, constf_size) + 31) / 32),
1057
            sizeof(*reg_maps->constf))))
1058 1059 1060 1061 1062 1063 1064 1065 1066
    {
        ERR("Failed to allocate constant map memory.\n");
        return E_OUTOFMEMORY;
    }

    while (!fe->shader_is_end(fe_data, &ptr))
    {
        struct wined3d_shader_instruction ins;

1067
        current_ins = ptr;
1068
        /* Fetch opcode. */
1069
        fe->shader_read_instruction(fe_data, &ptr, &ins);
1070 1071 1072 1073

        /* Unhandled opcode, and its parameters. */
        if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
        {
1074 1075
            WARN("Encountered unrecognised or invalid instruction.\n");
            return WINED3DERR_INVALIDCALL;
1076 1077 1078
        }

        /* Handle declarations. */
1079 1080
        if (ins.handler_idx == WINED3DSIH_DCL
                || ins.handler_idx == WINED3DSIH_DCL_UAV_TYPED)
1081
        {
1082
            struct wined3d_shader_semantic *semantic = &ins.declaration.semantic;
1083
            unsigned int reg_idx = semantic->reg.reg.idx[0].offset;
1084 1085

            switch (semantic->reg.reg.type)
1086 1087 1088
            {
                /* Mark input registers used. */
                case WINED3DSPR_INPUT:
1089 1090 1091 1092 1093
                    if (reg_idx >= MAX_REG_INPUT)
                    {
                        ERR("Invalid input register index %u.\n", reg_idx);
                        break;
                    }
1094 1095 1096
                    if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL && shader_version.major == 3
                            && semantic->usage == WINED3D_DECL_USAGE_POSITION && !semantic->usage_idx)
                        return WINED3DERR_INVALIDCALL;
1097
                    reg_maps->input_registers |= 1u << reg_idx;
1098
                    shader_signature_from_semantic(&input_signature_elements[reg_idx], semantic);
1099 1100 1101 1102
                    break;

                /* Vertex shader: mark 3.0 output registers used, save token. */
                case WINED3DSPR_OUTPUT:
1103 1104 1105 1106 1107
                    if (reg_idx >= MAX_REG_OUTPUT)
                    {
                        ERR("Invalid output register index %u.\n", reg_idx);
                        break;
                    }
1108
                    reg_maps->output_registers |= 1u << reg_idx;
1109
                    shader_signature_from_semantic(&output_signature_elements[reg_idx], semantic);
1110
                    if (semantic->usage == WINED3D_DECL_USAGE_FOG)
1111
                        reg_maps->fog = 1;
1112 1113
                    if (semantic->usage == WINED3D_DECL_USAGE_PSIZE)
                        reg_maps->point_size = 1;
1114 1115 1116
                    break;

                case WINED3DSPR_SAMPLER:
1117
                    shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1118
                case WINED3DSPR_RESOURCE:
1119
                    if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1120
                    {
1121
                        ERR("Invalid resource index %u.\n", reg_idx);
1122 1123
                        break;
                    }
1124 1125
                    reg_maps->resource_info[reg_idx].type = semantic->resource_type;
                    reg_maps->resource_info[reg_idx].data_type = semantic->resource_data_type;
1126 1127
                    break;

1128 1129 1130 1131 1132 1133 1134 1135
                case WINED3DSPR_UAV:
                    if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
                    {
                        ERR("Invalid UAV resource index %u.\n", reg_idx);
                        break;
                    }
                    reg_maps->uav_resource_info[reg_idx].type = semantic->resource_type;
                    reg_maps->uav_resource_info[reg_idx].data_type = semantic->resource_data_type;
1136 1137
                    if (ins.flags)
                        FIXME("Ignoring typed UAV flags %#x.\n", ins.flags);
1138 1139
                    break;

1140
                default:
1141
                    TRACE("Not recording DCL register type %#x.\n", semantic->reg.reg.type);
1142 1143 1144
                    break;
            }
        }
1145 1146 1147
        else if (ins.handler_idx == WINED3DSIH_DCL_CONSTANT_BUFFER)
        {
            struct wined3d_shader_register *reg = &ins.declaration.src.reg;
1148 1149
            if (reg->idx[0].offset >= WINED3D_MAX_CBS)
                ERR("Invalid CB index %u.\n", reg->idx[0].offset);
1150
            else
1151
                reg_maps->cb_sizes[reg->idx[0].offset] = reg->idx[1].offset;
1152
        }
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
        else if (ins.handler_idx == WINED3DSIH_DCL_GLOBAL_FLAGS)
        {
            if (ins.flags & WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL)
            {
                if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
                    shader->u.ps.force_early_depth_stencil = TRUE;
                else
                    FIXME("Invalid instruction %#x for shader type %#x.\n",
                            ins.handler_idx, shader_version.type);
            }
            else
            {
                WARN("Ignoring global flags %#x.\n", ins.flags);
            }
        }
1168 1169 1170 1171 1172 1173 1174 1175
        else if (ins.handler_idx == WINED3DSIH_DCL_GS_INSTANCES)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
                shader->u.gs.instance_count = ins.declaration.count;
            else
                FIXME("Invalid instruction %#x for shader type %#x.\n",
                        ins.handler_idx, shader_version.type);
        }
1176 1177
        else if (ins.handler_idx == WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT
                || ins.handler_idx == WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT)
1178 1179 1180 1181 1182 1183 1184
        {
            if (phase)
                phase->instance_count = ins.declaration.count;
            else
                FIXME("Instruction %s outside of shader phase.\n",
                        debug_d3dshaderinstructionhandler(ins.handler_idx));
        }
1185 1186 1187 1188 1189 1190
        else if (ins.handler_idx == WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER)
        {
            if (reg_maps->icb)
                FIXME("Multiple immediate constant buffers.\n");
            reg_maps->icb = ins.declaration.icb;
        }
1191 1192
        else if (ins.handler_idx == WINED3DSIH_DCL_INDEXABLE_TEMP)
        {
1193 1194 1195 1196 1197 1198 1199
            if (phase)
            {
                FIXME("Indexable temporary registers not supported.\n");
            }
            else
            {
                struct wined3d_shader_indexable_temp *reg;
1200

1201
                if (!(reg = heap_alloc(sizeof(*reg))))
1202
                    return E_OUTOFMEMORY;
1203

1204 1205 1206
                *reg = ins.declaration.indexable_temp;
                list_add_tail(&reg_maps->indexable_temps, &reg->entry);
            }
1207
        }
1208 1209 1210
        else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PRIMITIVE)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1211
                shader->u.gs.input_type = ins.declaration.primitive_type.type;
1212 1213 1214 1215
            else
                FIXME("Invalid instruction %#x for shader type %#x.\n",
                        ins.handler_idx, shader_version.type);
        }
1216 1217 1218
        else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS)
        {
            unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1219
            if (reg_idx >= MAX_REG_INPUT)
1220 1221 1222 1223 1224
            {
                ERR("Invalid register index %u.\n", reg_idx);
                break;
            }
            if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1225
                wined3d_insert_interpolation_mode(shader->u.ps.interpolation_mode, reg_idx, ins.flags);
1226 1227 1228 1229
            else
                FIXME("Invalid instruction %#x for shader type %#x.\n",
                        ins.handler_idx, shader_version.type);
        }
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242
        else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT)
        {
            if (ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUT
                    || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTGE
                    || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTLE)
            {
                if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
                    shader->u.ps.depth_output = ins.declaration.dst.reg.type;
                else
                    FIXME("Invalid instruction %#x for shader type %#x.\n",
                            ins.handler_idx, shader_version.type);
            }
        }
1243 1244 1245 1246 1247 1248 1249
        else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
                shader->u.hs.output_vertex_count = ins.declaration.count;
            else
                FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
        }
1250 1251 1252
        else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT_TOPOLOGY)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1253
                shader->u.gs.output_type = ins.declaration.primitive_type.type;
1254 1255 1256 1257
            else
                FIXME("Invalid instruction %#x for shader type %#x.\n",
                        ins.handler_idx, shader_version.type);
        }
1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
        else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_RAW)
        {
            unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
            if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
            {
                ERR("Invalid resource index %u.\n", reg_idx);
                break;
            }
            reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
            reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
            reg_maps->resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
        }
1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282
        else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_STRUCTURED)
        {
            unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
            if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
            {
                ERR("Invalid resource index %u.\n", reg_idx);
                break;
            }
            reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
            reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
            reg_maps->resource_info[reg_idx].flags = 0;
            reg_maps->resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
        }
1283 1284 1285 1286 1287
        else if (ins.handler_idx == WINED3DSIH_DCL_SAMPLER)
        {
            if (ins.flags & WINED3DSI_SAMPLER_COMPARISON_MODE)
                reg_maps->sampler_comparison_mode |= (1u << ins.declaration.dst.reg.idx[0].offset);
        }
1288 1289
        else if (ins.handler_idx == WINED3DSIH_DCL_TEMPS)
        {
1290 1291 1292 1293
            if (phase)
                phase->temporary_count = ins.declaration.count;
            else
                reg_maps->temporary_count = ins.declaration.count;
1294
        }
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315
        else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_DOMAIN)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_DOMAIN)
                shader->u.ds.tessellator_domain = ins.declaration.tessellator_domain;
            else if (shader_version.type != WINED3D_SHADER_TYPE_HULL)
                FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
        }
        else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
                shader->u.hs.tessellator_output_primitive = ins.declaration.tessellator_output_primitive;
            else
                FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
        }
        else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_PARTITIONING)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
                shader->u.hs.tessellator_partitioning = ins.declaration.tessellator_partitioning;
            else
                FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
        }
1316 1317
        else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_RAW)
        {
1318 1319 1320
            if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps, ins.declaration.tgsm_raw.reg.reg.idx[0].offset,
                    ins.declaration.tgsm_raw.byte_count / 4, 0)))
                return hr;
1321 1322 1323
        }
        else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_STRUCTURED)
        {
1324 1325 1326 1327 1328
            unsigned int stride = ins.declaration.tgsm_structured.byte_stride / 4;
            unsigned int size = stride * ins.declaration.tgsm_structured.structure_count;
            if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps,
                    ins.declaration.tgsm_structured.reg.reg.idx[0].offset, size, stride)))
                return hr;
1329
        }
1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341
        else if (ins.handler_idx == WINED3DSIH_DCL_THREAD_GROUP)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_COMPUTE)
            {
                shader->u.cs.thread_group_size = ins.declaration.thread_group_size;
            }
            else
            {
                FIXME("Invalid instruction %#x for shader type %#x.\n",
                        ins.handler_idx, shader_version.type);
            }
        }
1342 1343 1344 1345 1346 1347 1348 1349
        else if (ins.handler_idx == WINED3DSIH_DCL_UAV_RAW)
        {
            unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
            if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
            {
                ERR("Invalid UAV resource index %u.\n", reg_idx);
                break;
            }
1350 1351
            if (ins.flags)
                FIXME("Ignoring raw UAV flags %#x.\n", ins.flags);
1352 1353 1354 1355
            reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
            reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
            reg_maps->uav_resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
        }
1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
        else if (ins.handler_idx == WINED3DSIH_DCL_UAV_STRUCTURED)
        {
            unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
            if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
            {
                ERR("Invalid UAV resource index %u.\n", reg_idx);
                break;
            }
            if (ins.flags)
                FIXME("Ignoring structured UAV flags %#x.\n", ins.flags);
            reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
            reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
            reg_maps->uav_resource_info[reg_idx].flags = 0;
            reg_maps->uav_resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
        }
1371 1372 1373 1374 1375 1376 1377 1378
        else if (ins.handler_idx == WINED3DSIH_DCL_VERTICES_OUT)
        {
            if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
                shader->u.gs.vertices_out = ins.declaration.count;
            else
                FIXME("Invalid instruction %#x for shader type %#x.\n",
                        ins.handler_idx, shader_version.type);
        }
1379 1380
        else if (ins.handler_idx == WINED3DSIH_DEF)
        {
1381
            struct wined3d_shader_lconst *lconst;
1382
            float *value;
1383 1384 1385

            if (!(lconst = heap_alloc(sizeof(*lconst))))
                return E_OUTOFMEMORY;
1386

1387
            lconst->idx = ins.dst[0].reg.idx[0].offset;
1388
            memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1389
            value = (float *)lconst->value;
1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403

            /* In pixel shader 1.X shaders, the constants are clamped between [-1;1] */
            if (shader_version.major == 1 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
            {
                if (value[0] < -1.0f) value[0] = -1.0f;
                else if (value[0] > 1.0f) value[0] = 1.0f;
                if (value[1] < -1.0f) value[1] = -1.0f;
                else if (value[1] > 1.0f) value[1] = 1.0f;
                if (value[2] < -1.0f) value[2] = -1.0f;
                else if (value[2] > 1.0f) value[2] = 1.0f;
                if (value[3] < -1.0f) value[3] = -1.0f;
                else if (value[3] > 1.0f) value[3] = 1.0f;
            }

1404
            list_add_head(&shader->constantsF, &lconst->entry);
1405 1406 1407 1408 1409 1410

            if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
                    || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
            {
                shader->lconst_inf_or_nan = TRUE;
            }
1411 1412 1413
        }
        else if (ins.handler_idx == WINED3DSIH_DEFI)
        {
1414 1415 1416 1417
            struct wined3d_shader_lconst *lconst;

            if (!(lconst = heap_alloc(sizeof(*lconst))))
                return E_OUTOFMEMORY;
1418

1419
            lconst->idx = ins.dst[0].reg.idx[0].offset;
1420
            memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1421

1422
            list_add_head(&shader->constantsI, &lconst->entry);
1423
            reg_maps->local_int_consts |= (1u << lconst->idx);
1424 1425 1426
        }
        else if (ins.handler_idx == WINED3DSIH_DEFB)
        {
1427 1428 1429 1430
            struct wined3d_shader_lconst *lconst;

            if (!(lconst = heap_alloc(sizeof(*lconst))))
                return E_OUTOFMEMORY;
1431

1432
            lconst->idx = ins.dst[0].reg.idx[0].offset;
1433
            memcpy(lconst->value, ins.src[0].reg.u.immconst_data, sizeof(DWORD));
1434

1435
            list_add_head(&shader->constantsB, &lconst->entry);
1436
            reg_maps->local_bool_consts |= (1u << lconst->idx);
1437
        }
1438 1439 1440 1441 1442 1443 1444 1445
        /* Handle shader phases. */
        else if (ins.handler_idx == WINED3DSIH_HS_CONTROL_POINT_PHASE
                || ins.handler_idx == WINED3DSIH_HS_FORK_PHASE
                || ins.handler_idx == WINED3DSIH_HS_JOIN_PHASE)
        {
            if (FAILED(hr = shader_record_shader_phase(shader, &phase, &ins, current_ins, prev_ins)))
                return hr;
        }
1446 1447 1448
        /* For subroutine prototypes. */
        else if (ins.handler_idx == WINED3DSIH_LABEL)
        {
1449
            reg_maps->labels |= 1u << ins.src[0].reg.idx[0].offset;
1450 1451 1452 1453 1454
        }
        /* Set texture, address, temporary registers. */
        else
        {
            BOOL color0_mov = FALSE;
1455
            unsigned int i;
1456 1457 1458 1459 1460 1461 1462

            /* This will loop over all the registers and try to
             * make a bitmask of the ones we're interested in.
             *
             * Relative addressing tokens are ignored, but that's
             * okay, since we'll catch any address registers when
             * they are initialized (required by spec). */
1463
            for (i = 0; i < ins.dst_count; ++i)
1464
            {
1465 1466 1467
                if (!shader_record_register_usage(shader, reg_maps, &ins.dst[i].reg,
                        shader_version.type, constf_size))
                    return WINED3DERR_INVALIDCALL;
1468

1469
                if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1470
                {
1471
                    UINT idx = ins.dst[i].reg.idx[0].offset;
1472

1473
                    switch (ins.dst[i].reg.type)
1474 1475
                    {
                        case WINED3DSPR_RASTOUT:
1476 1477
                            if (shader_version.major >= 3)
                                break;
1478 1479 1480
                            switch (idx)
                            {
                                case 0: /* oPos */
1481
                                    reg_maps->output_registers |= 1u << 10;
1482
                                    shader_signature_from_usage(&output_signature_elements[10],
1483
                                            WINED3D_DECL_USAGE_POSITION, 0, 10, WINED3DSP_WRITEMASK_ALL);
1484 1485 1486
                                    break;

                                case 1: /* oFog */
1487
                                    reg_maps->output_registers |= 1u << 11;
1488
                                    shader_signature_from_usage(&output_signature_elements[11],
1489
                                            WINED3D_DECL_USAGE_FOG, 0, 11, WINED3DSP_WRITEMASK_0);
1490 1491 1492
                                    break;

                                case 2: /* oPts */
1493
                                    reg_maps->output_registers |= 1u << 11;
1494
                                    shader_signature_from_usage(&output_signature_elements[11],
1495
                                            WINED3D_DECL_USAGE_PSIZE, 0, 11, WINED3DSP_WRITEMASK_1);
1496 1497 1498 1499 1500
                                    break;
                            }
                            break;

                        case WINED3DSPR_ATTROUT:
1501 1502
                            if (shader_version.major >= 3)
                                break;
1503 1504 1505
                            if (idx < 2)
                            {
                                idx += 8;
1506
                                if (reg_maps->output_registers & (1u << idx))
1507
                                {
1508
                                    output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1509 1510 1511
                                }
                                else
                                {
1512
                                    reg_maps->output_registers |= 1u << idx;
1513
                                    shader_signature_from_usage(&output_signature_elements[idx],
1514
                                            WINED3D_DECL_USAGE_COLOR, idx - 8, idx, ins.dst[i].write_mask);
1515 1516 1517 1518
                                }
                            }
                            break;

1519
                        case WINED3DSPR_TEXCRDOUT: /* WINED3DSPR_OUTPUT */
1520 1521
                            if (shader_version.major >= 3)
                            {
1522 1523 1524 1525 1526
                                if (idx >= ARRAY_SIZE(reg_maps->u.output_registers_mask))
                                {
                                    WARN("Invalid output register index %u.\n", idx);
                                    break;
                                }
1527 1528 1529
                                reg_maps->u.output_registers_mask[idx] |= ins.dst[i].write_mask;
                                break;
                            }
1530 1531 1532 1533 1534
                            if (idx >= ARRAY_SIZE(reg_maps->u.texcoord_mask))
                            {
                                WARN("Invalid texcoord index %u.\n", idx);
                                break;
                            }
1535
                            reg_maps->u.texcoord_mask[idx] |= ins.dst[i].write_mask;
1536
                            if (reg_maps->output_registers & (1u << idx))
1537
                            {
1538
                                output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1539 1540 1541
                            }
                            else
                            {
1542
                                reg_maps->output_registers |= 1u << idx;
1543
                                shader_signature_from_usage(&output_signature_elements[idx],
1544
                                        WINED3D_DECL_USAGE_TEXCOORD, idx, idx, ins.dst[i].write_mask);
1545 1546 1547 1548 1549 1550
                            }
                            break;

                        default:
                            break;
                    }
1551 1552 1553 1554
                }

                if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
                {
1555
                    if (ins.dst[i].reg.type == WINED3DSPR_COLOROUT && !ins.dst[i].reg.idx[0].offset)
1556
                    {
1557 1558 1559 1560 1561 1562 1563 1564
                        /* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to
                         * COLOROUT 0. If we know this in advance, the ARB shader backend can skip
                         * the mov and perform the sRGB write correction from the source register.
                         *
                         * However, if the mov is only partial, we can't do this, and if the write
                         * comes from an instruction other than MOV it is hard to do as well. If
                         * COLOROUT 0 is overwritten partially later, the marker is dropped again. */
                        shader->u.ps.color0_mov = FALSE;
1565
                        if (ins.handler_idx == WINED3DSIH_MOV
1566
                                && ins.dst[i].write_mask == WINED3DSP_WRITEMASK_ALL)
1567 1568 1569 1570 1571 1572 1573 1574
                        {
                            /* Used later when the source register is read. */
                            color0_mov = TRUE;
                        }
                    }
                    /* Also drop the MOV marker if the source register is overwritten prior to the shader
                     * end
                     */
1575
                    else if (ins.dst[i].reg.type == WINED3DSPR_TEMP
1576
                            && ins.dst[i].reg.idx[0].offset == shader->u.ps.color0_reg)
1577
                    {
1578
                        shader->u.ps.color0_mov = FALSE;
1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
                    }
                }

                /* Declare 1.x samplers implicitly, based on the destination reg. number. */
                if (shader_version.major == 1
                        && (ins.handler_idx == WINED3DSIH_TEX
                            || ins.handler_idx == WINED3DSIH_TEXBEM
                            || ins.handler_idx == WINED3DSIH_TEXBEML
                            || ins.handler_idx == WINED3DSIH_TEXDP3TEX
                            || ins.handler_idx == WINED3DSIH_TEXM3x2TEX
                            || ins.handler_idx == WINED3DSIH_TEXM3x3SPEC
                            || ins.handler_idx == WINED3DSIH_TEXM3x3TEX
                            || ins.handler_idx == WINED3DSIH_TEXM3x3VSPEC
                            || ins.handler_idx == WINED3DSIH_TEXREG2AR
                            || ins.handler_idx == WINED3DSIH_TEXREG2GB
                            || ins.handler_idx == WINED3DSIH_TEXREG2RGB))
                {
1596 1597
                    unsigned int reg_idx = ins.dst[i].reg.idx[0].offset;

1598 1599 1600 1601 1602 1603
                    if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
                    {
                        WARN("Invalid 1.x sampler index %u.\n", reg_idx);
                        continue;
                    }

1604
                    TRACE("Setting fake 2D resource for 1.x pixelshader.\n");
1605 1606
                    reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
                    reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_FLOAT;
1607
                    shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1608 1609 1610 1611 1612

                    /* texbem is only valid with < 1.4 pixel shaders */
                    if (ins.handler_idx == WINED3DSIH_TEXBEM
                            || ins.handler_idx == WINED3DSIH_TEXBEML)
                    {
1613
                        reg_maps->bumpmat |= 1u << reg_idx;
1614 1615
                        if (ins.handler_idx == WINED3DSIH_TEXBEML)
                        {
1616
                            reg_maps->luminanceparams |= 1u << reg_idx;
1617 1618 1619 1620 1621
                        }
                    }
                }
                else if (ins.handler_idx == WINED3DSIH_BEM)
                {
1622
                    reg_maps->bumpmat |= 1u << ins.dst[i].reg.idx[0].offset;
1623 1624 1625
                }
            }

1626
            if (ins.handler_idx == WINED3DSIH_IMM_ATOMIC_ALLOC || ins.handler_idx == WINED3DSIH_IMM_ATOMIC_CONSUME)
1627 1628 1629 1630 1631 1632 1633 1634 1635 1636
            {
                unsigned int reg_idx = ins.src[0].reg.idx[0].offset;
                if (reg_idx >= MAX_UNORDERED_ACCESS_VIEWS)
                {
                    ERR("Invalid UAV index %u.\n", reg_idx);
                    break;
                }
                reg_maps->uav_counter_mask |= (1u << reg_idx);
            }
            else if ((WINED3DSIH_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_ATOMIC_XOR)
1637
                    || (WINED3DSIH_IMM_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_IMM_ATOMIC_XOR)
1638
                    || (ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_UAV)
1639 1640 1641
                    || ins.handler_idx == WINED3DSIH_LD_UAV_TYPED
                    || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_UAV)
                    || (ins.handler_idx == WINED3DSIH_LD_STRUCTURED && ins.src[2].reg.type == WINED3DSPR_UAV))
1642
            {
1643
                unsigned int reg_idx;
1644
                if (ins.handler_idx == WINED3DSIH_LD_UAV_TYPED || ins.handler_idx == WINED3DSIH_LD_RAW)
1645
                    reg_idx = ins.src[1].reg.idx[0].offset;
1646 1647
                else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED)
                    reg_idx = ins.src[2].reg.idx[0].offset;
1648
                else if (WINED3DSIH_ATOMIC_AND <= ins.handler_idx && ins.handler_idx <= WINED3DSIH_ATOMIC_XOR)
1649
                    reg_idx = ins.dst[0].reg.idx[0].offset;
1650 1651
                else if (ins.handler_idx == WINED3DSIH_BUFINFO)
                    reg_idx = ins.src[0].reg.idx[0].offset;
1652 1653
                else
                    reg_idx = ins.dst[1].reg.idx[0].offset;
1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664
                if (reg_idx >= MAX_UNORDERED_ACCESS_VIEWS)
                {
                    ERR("Invalid UAV index %u.\n", reg_idx);
                    break;
                }
                reg_maps->uav_read_mask |= (1u << reg_idx);
            }
            else if (ins.handler_idx == WINED3DSIH_NRM)
            {
                reg_maps->usesnrm = 1;
            }
1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676
            else if (ins.handler_idx == WINED3DSIH_DSY
                    || ins.handler_idx == WINED3DSIH_DSY_COARSE
                    || ins.handler_idx == WINED3DSIH_DSY_FINE)
            {
                reg_maps->usesdsy = 1;
            }
            else if (ins.handler_idx == WINED3DSIH_DSX
                    || ins.handler_idx == WINED3DSIH_DSX_COARSE
                    || ins.handler_idx == WINED3DSIH_DSX_FINE)
            {
                reg_maps->usesdsx = 1;
            }
1677 1678 1679 1680 1681
            else if (ins.handler_idx == WINED3DSIH_TEXLDD) reg_maps->usestexldd = 1;
            else if (ins.handler_idx == WINED3DSIH_TEXLDL) reg_maps->usestexldl = 1;
            else if (ins.handler_idx == WINED3DSIH_MOVA) reg_maps->usesmova = 1;
            else if (ins.handler_idx == WINED3DSIH_IFC) reg_maps->usesifc = 1;
            else if (ins.handler_idx == WINED3DSIH_CALL) reg_maps->usescall = 1;
1682
            else if (ins.handler_idx == WINED3DSIH_POW) reg_maps->usespow = 1;
1683 1684 1685 1686 1687 1688 1689 1690 1691
            else if (ins.handler_idx == WINED3DSIH_LOOP
                    || ins.handler_idx == WINED3DSIH_REP)
            {
                ++cur_loop_depth;
                if (cur_loop_depth > max_loop_depth)
                    max_loop_depth = cur_loop_depth;
            }
            else if (ins.handler_idx == WINED3DSIH_ENDLOOP
                    || ins.handler_idx == WINED3DSIH_ENDREP)
1692
            {
1693
                --cur_loop_depth;
1694
            }
1695
            else if (ins.handler_idx == WINED3DSIH_GATHER4
1696
                    || ins.handler_idx == WINED3DSIH_GATHER4_C
1697
                    || ins.handler_idx == WINED3DSIH_SAMPLE
1698
                    || ins.handler_idx == WINED3DSIH_SAMPLE_B
1699
                    || ins.handler_idx == WINED3DSIH_SAMPLE_C
1700
                    || ins.handler_idx == WINED3DSIH_SAMPLE_C_LZ
1701 1702 1703 1704 1705 1706
                    || ins.handler_idx == WINED3DSIH_SAMPLE_GRAD
                    || ins.handler_idx == WINED3DSIH_SAMPLE_LOD)
            {
                shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
                        ins.src[2].reg.idx[0].offset, reg_maps->sampler_map.count);
            }
1707 1708
            else if (ins.handler_idx == WINED3DSIH_GATHER4_PO
                    || ins.handler_idx == WINED3DSIH_GATHER4_PO_C)
1709 1710 1711 1712
            {
                shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
                        ins.src[3].reg.idx[0].offset, reg_maps->sampler_map.count);
            }
1713
            else if ((ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE)
1714
                    || (ins.handler_idx == WINED3DSIH_SAMPLE_INFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE))
1715 1716 1717 1718
            {
                shader_record_sample(reg_maps, ins.src[0].reg.idx[0].offset,
                        WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
            }
1719
            else if (ins.handler_idx == WINED3DSIH_LD
1720
                    || ins.handler_idx == WINED3DSIH_LD2DMS
1721
                    || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_RESOURCE)
1722
                    || (ins.handler_idx == WINED3DSIH_RESINFO && ins.src[1].reg.type == WINED3DSPR_RESOURCE))
1723 1724 1725 1726
            {
                shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
                        WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
            }
1727 1728 1729 1730 1731 1732
            else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED
                    && ins.src[2].reg.type == WINED3DSPR_RESOURCE)
            {
                shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
                        WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
            }
1733

1734
            if (ins.predicate)
1735 1736 1737
                if (!shader_record_register_usage(shader, reg_maps, &ins.predicate->reg,
                        shader_version.type, constf_size))
                    return WINED3DERR_INVALIDCALL;
1738

1739 1740 1741 1742
            for (i = 0; i < ins.src_count; ++i)
            {
                unsigned int count = get_instr_extra_regcount(ins.handler_idx, i);
                struct wined3d_shader_register reg = ins.src[i].reg;
1743

1744 1745 1746
                if (!shader_record_register_usage(shader, reg_maps, &ins.src[i].reg,
                        shader_version.type, constf_size))
                    return WINED3DERR_INVALIDCALL;
1747 1748
                while (count)
                {
1749
                    ++reg.idx[0].offset;
1750 1751 1752
                    if (!shader_record_register_usage(shader, reg_maps, &reg,
                            shader_version.type, constf_size))
                        return WINED3DERR_INVALIDCALL;
1753 1754 1755 1756 1757
                    --count;
                }

                if (color0_mov)
                {
1758 1759
                    if (ins.src[i].reg.type == WINED3DSPR_TEMP
                            && ins.src[i].swizzle == WINED3DSP_NOSWIZZLE)
1760
                    {
1761
                        shader->u.ps.color0_mov = TRUE;
1762
                        shader->u.ps.color0_reg = ins.src[i].reg.idx[0].offset;
1763 1764 1765 1766
                    }
                }
            }
        }
1767 1768

        prev_ins = current_ins;
1769 1770 1771
    }
    reg_maps->loop_depth = max_loop_depth;

1772 1773 1774 1775 1776 1777
    if (phase)
    {
        phase->end = prev_ins;
        phase = NULL;
    }

1778 1779 1780
    /* PS before 2.0 don't have explicit color outputs. Instead the value of
     * R0 is written to the render target. */
    if (shader_version.major < 2 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1781
        reg_maps->rt_mask |= (1u << 0);
1782

1783 1784 1785 1786
    if (input_signature->elements)
    {
        for (i = 0; i < input_signature->element_count; ++i)
        {
1787 1788 1789 1790 1791 1792 1793 1794 1795
            if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
            {
                if (input_signature->elements[i].register_idx >= ARRAY_SIZE(shader->u.vs.attributes))
                {
                    WARN("Invalid input signature register index %u.\n", input_signature->elements[i].register_idx);
                    return WINED3DERR_INVALIDCALL;
                }
            }
            else if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1796 1797 1798 1799 1800 1801
            {
                if (input_signature->elements[i].sysval_semantic == WINED3D_SV_POSITION)
                    reg_maps->vpos = 1;
                else if (input_signature->elements[i].sysval_semantic == WINED3D_SV_IS_FRONT_FACE)
                    reg_maps->usesfacing = 1;
            }
1802
            reg_maps->input_registers |= 1u << input_signature->elements[i].register_idx;
1803 1804 1805 1806
        }
    }
    else if (!input_signature->elements && reg_maps->input_registers)
    {
1807
        unsigned int count = wined3d_popcount(reg_maps->input_registers);
1808 1809 1810
        struct wined3d_shader_signature_element *e;
        unsigned int i;

1811
        if (!(input_signature->elements = heap_calloc(count, sizeof(*input_signature->elements))))
1812 1813 1814 1815 1816 1817
            return E_OUTOFMEMORY;
        input_signature->element_count = count;

        e = input_signature->elements;
        for (i = 0; i < ARRAY_SIZE(input_signature_elements); ++i)
        {
1818
            if (!(reg_maps->input_registers & (1u << i)))
1819 1820 1821 1822 1823 1824
                continue;
            input_signature_elements[i].register_idx = i;
            *e++ = input_signature_elements[i];
        }
    }

1825 1826
    if (output_signature->elements)
    {
1827 1828
        if (FAILED(hr = shader_scan_output_signature(shader)))
            return hr;
1829 1830 1831
    }
    else if (reg_maps->output_registers)
    {
1832
        unsigned int count = wined3d_popcount(reg_maps->output_registers);
1833 1834
        struct wined3d_shader_signature_element *e;

1835
        if (!(output_signature->elements = heap_calloc(count, sizeof(*output_signature->elements))))
1836 1837 1838 1839 1840 1841
            return E_OUTOFMEMORY;
        output_signature->element_count = count;

        e = output_signature->elements;
        for (i = 0; i < ARRAY_SIZE(output_signature_elements); ++i)
        {
1842
            if (!(reg_maps->output_registers & (1u << i)))
1843 1844 1845 1846 1847
                continue;
            *e++ = output_signature_elements[i];
        }
    }

1848 1849 1850
    return WINED3D_OK;
}

1851 1852 1853 1854
static void shader_cleanup_reg_maps(struct wined3d_shader_reg_maps *reg_maps)
{
    struct wined3d_shader_indexable_temp *reg, *reg_next;

1855 1856
    heap_free(reg_maps->constf);
    heap_free(reg_maps->sampler_map.entries);
1857 1858

    LIST_FOR_EACH_ENTRY_SAFE(reg, reg_next, &reg_maps->indexable_temps, struct wined3d_shader_indexable_temp, entry)
1859
        heap_free(reg);
1860
    list_init(&reg_maps->indexable_temps);
1861

1862
    heap_free(reg_maps->tgsm);
1863 1864
}

1865
unsigned int shader_find_free_input_register(const struct wined3d_shader_reg_maps *reg_maps, unsigned int max)
1866
{
1867
    DWORD map = 1u << max;
1868 1869 1870 1871 1872 1873
    map |= map - 1;
    map &= reg_maps->shader_version.major < 3 ? ~reg_maps->texcoord : ~reg_maps->input_registers;

    return wined3d_log2i(map);
}

1874
static void shader_dump_global_flags(struct wined3d_string_buffer *buffer, DWORD global_flags)
1875 1876 1877
{
    if (global_flags & WINED3DSGF_REFACTORING_ALLOWED)
    {
1878
        shader_addline(buffer, "refactoringAllowed");
1879 1880
        global_flags &= ~WINED3DSGF_REFACTORING_ALLOWED;
        if (global_flags)
1881
            shader_addline(buffer, " | ");
1882 1883
    }

1884 1885 1886 1887 1888 1889 1890 1891
    if (global_flags & WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL)
    {
        shader_addline(buffer, "forceEarlyDepthStencil");
        global_flags &= ~WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL;
        if (global_flags)
            shader_addline(buffer, " | ");
    }

1892 1893
    if (global_flags & WINED3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS)
    {
1894
        shader_addline(buffer, "enableRawAndStructuredBuffers");
1895 1896 1897 1898
        global_flags &= ~WINED3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS;
    }

    if (global_flags)
1899
        shader_addline(buffer, "unknown_flags(%#x)", global_flags);
1900 1901
}

1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
static void shader_dump_sync_flags(struct wined3d_string_buffer *buffer, DWORD sync_flags)
{
    if (sync_flags & WINED3DSSF_GROUP_SHARED_MEMORY)
    {
        shader_addline(buffer, "_g");
        sync_flags &= ~WINED3DSSF_GROUP_SHARED_MEMORY;
    }
    if (sync_flags & WINED3DSSF_THREAD_GROUP)
    {
        shader_addline(buffer, "_t");
        sync_flags &= ~WINED3DSSF_THREAD_GROUP;
    }

    if (sync_flags)
        shader_addline(buffer, "_unknown_flags(%#x)", sync_flags);
}

1919
static void shader_dump_precise_flags(struct wined3d_string_buffer *buffer, DWORD flags)
1920
{
1921
    if (!(flags & WINED3DSI_PRECISE_XYZW))
1922 1923 1924
        return;

    shader_addline(buffer, " [precise");
1925
    if (flags != WINED3DSI_PRECISE_XYZW)
1926 1927
    {
        shader_addline(buffer, "(%s%s%s%s)",
1928 1929 1930 1931
                flags & WINED3DSI_PRECISE_X ? "x" : "",
                flags & WINED3DSI_PRECISE_Y ? "y" : "",
                flags & WINED3DSI_PRECISE_Z ? "z" : "",
                flags & WINED3DSI_PRECISE_W ? "w" : "");
1932 1933 1934 1935
    }
    shader_addline(buffer, "]");
}

1936 1937 1938 1939 1940 1941 1942
static void shader_dump_uav_flags(struct wined3d_string_buffer *buffer, DWORD uav_flags)
{
    if (uav_flags & WINED3DSUF_GLOBALLY_COHERENT)
    {
        shader_addline(buffer, "_glc");
        uav_flags &= ~WINED3DSUF_GLOBALLY_COHERENT;
    }
1943 1944 1945 1946 1947
    if (uav_flags & WINED3DSUF_ORDER_PRESERVING_COUNTER)
    {
        shader_addline(buffer, "_opc");
        uav_flags &= ~WINED3DSUF_ORDER_PRESERVING_COUNTER;
    }
1948 1949 1950 1951 1952

    if (uav_flags)
        shader_addline(buffer, "_unknown_flags(%#x)", uav_flags);
}

1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972
static void shader_dump_tessellator_domain(struct wined3d_string_buffer *buffer,
        enum wined3d_tessellator_domain domain)
{
    switch (domain)
    {
        case WINED3D_TESSELLATOR_DOMAIN_LINE:
            shader_addline(buffer, "line");
            break;
        case WINED3D_TESSELLATOR_DOMAIN_TRIANGLE:
            shader_addline(buffer, "triangle");
            break;
        case WINED3D_TESSELLATOR_DOMAIN_QUAD:
            shader_addline(buffer, "quad");
            break;
        default:
            shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain);
            break;
    }
}

1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995
static void shader_dump_tessellator_output_primitive(struct wined3d_string_buffer *buffer,
        enum wined3d_tessellator_output_primitive output_primitive)
{
    switch (output_primitive)
    {
        case WINED3D_TESSELLATOR_OUTPUT_POINT:
            shader_addline(buffer, "point");
            break;
        case WINED3D_TESSELLATOR_OUTPUT_LINE:
            shader_addline(buffer, "line");
            break;
        case WINED3D_TESSELLATOR_OUTPUT_TRIANGLE_CW:
            shader_addline(buffer, "triangle_cw");
            break;
        case WINED3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
            shader_addline(buffer, "triangle_ccw");
            break;
        default:
            shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive);
            break;
    }
}

1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
static void shader_dump_tessellator_partitioning(struct wined3d_string_buffer *buffer,
        enum wined3d_tessellator_partitioning partitioning)
{
    switch (partitioning)
    {
        case WINED3D_TESSELLATOR_PARTITIONING_INTEGER:
            shader_addline(buffer, "integer");
            break;
        case WINED3D_TESSELLATOR_PARTITIONING_POW2:
            shader_addline(buffer, "pow2");
            break;
        case WINED3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
            shader_addline(buffer, "fractional_odd");
            break;
        case WINED3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
            shader_addline(buffer, "fractional_even");
            break;
        default:
            shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning);
            break;
    }
}

2019 2020
static void shader_dump_shader_input_sysval_semantic(struct wined3d_string_buffer *buffer,
        enum wined3d_shader_input_sysval_semantic semantic)
2021 2022 2023
{
    unsigned int i;

2024
    for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i)
2025
    {
2026
        if (shader_input_sysval_semantic_names[i].sysval_semantic == semantic)
2027
        {
2028
            shader_addline(buffer, "%s", shader_input_sysval_semantic_names[i].sysval_name);
2029 2030 2031 2032
            return;
        }
    }

2033
    shader_addline(buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic);
2034 2035
}

2036
static void shader_dump_decl_usage(struct wined3d_string_buffer *buffer,
2037 2038
        const struct wined3d_shader_semantic *semantic, unsigned int flags,
        const struct wined3d_shader_version *shader_version)
2039
{
2040
    shader_addline(buffer, "dcl");
2041 2042 2043

    if (semantic->reg.reg.type == WINED3DSPR_SAMPLER)
    {
2044
        switch (semantic->resource_type)
2045
        {
2046
            case WINED3D_SHADER_RESOURCE_TEXTURE_2D:
2047
                shader_addline(buffer, "_2d");
2048 2049 2050
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
2051
                shader_addline(buffer, "_3d");
2052 2053 2054
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
2055
                shader_addline(buffer, "_cube");
2056 2057 2058
                break;

            default:
2059
                shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type);
2060 2061 2062
                break;
        }
    }
2063
    else if (semantic->reg.reg.type == WINED3DSPR_RESOURCE || semantic->reg.reg.type == WINED3DSPR_UAV)
2064
    {
2065 2066 2067 2068
        if (semantic->reg.reg.type == WINED3DSPR_RESOURCE)
            shader_addline(buffer, "_resource_");
        else
            shader_addline(buffer, "_uav_");
2069 2070 2071
        switch (semantic->resource_type)
        {
            case WINED3D_SHADER_RESOURCE_BUFFER:
2072
                shader_addline(buffer, "buffer");
2073 2074 2075
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_1D:
2076
                shader_addline(buffer, "texture1d");
2077 2078 2079
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_2D:
2080
                shader_addline(buffer, "texture2d");
2081 2082 2083
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_2DMS:
2084
                shader_addline(buffer, "texture2dms");
2085 2086 2087
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
2088
                shader_addline(buffer, "texture3d");
2089 2090 2091
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
2092
                shader_addline(buffer, "texturecube");
2093 2094 2095
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_1DARRAY:
2096
                shader_addline(buffer, "texture1darray");
2097 2098 2099
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_2DARRAY:
2100
                shader_addline(buffer, "texture2darray");
2101 2102 2103
                break;

            case WINED3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY:
2104
                shader_addline(buffer, "texture2dmsarray");
2105 2106
                break;

2107 2108 2109 2110
            case WINED3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY:
                shader_addline(buffer, "texturecubearray");
                break;

2111
            default:
2112
                shader_addline(buffer, "unknown");
2113
                break;
2114
        }
2115 2116
        if (semantic->reg.reg.type == WINED3DSPR_UAV)
            shader_dump_uav_flags(buffer, flags);
2117 2118 2119
        switch (semantic->resource_data_type)
        {
            case WINED3D_DATA_FLOAT:
2120
                shader_addline(buffer, " (float)");
2121 2122 2123
                break;

            case WINED3D_DATA_INT:
2124
                shader_addline(buffer, " (int)");
2125 2126 2127
                break;

            case WINED3D_DATA_UINT:
2128
                shader_addline(buffer, " (uint)");
2129 2130 2131
                break;

            case WINED3D_DATA_UNORM:
2132
                shader_addline(buffer, " (unorm)");
2133 2134 2135
                break;

            case WINED3D_DATA_SNORM:
2136
                shader_addline(buffer, " (snorm)");
2137 2138 2139
                break;

            default:
2140
                shader_addline(buffer, " (unknown)");
2141 2142
                break;
        }
2143 2144 2145 2146
    }
    else
    {
        /* Pixel shaders 3.0 don't have usage semantics. */
2147 2148 2149 2150
        if (shader_version->major < 3 && shader_version->type == WINED3D_SHADER_TYPE_PIXEL)
            return;
        else
            shader_addline(buffer, "_");
2151 2152 2153

        switch (semantic->usage)
        {
2154
            case WINED3D_DECL_USAGE_POSITION:
2155
                shader_addline(buffer, "position%u", semantic->usage_idx);
2156 2157
                break;

2158
            case WINED3D_DECL_USAGE_BLEND_INDICES:
2159
                shader_addline(buffer, "blend");
2160 2161
                break;

2162
            case WINED3D_DECL_USAGE_BLEND_WEIGHT:
2163
                shader_addline(buffer, "weight");
2164 2165
                break;

2166
            case WINED3D_DECL_USAGE_NORMAL:
2167
                shader_addline(buffer, "normal%u", semantic->usage_idx);
2168 2169
                break;

2170
            case WINED3D_DECL_USAGE_PSIZE:
2171
                shader_addline(buffer, "psize");
2172 2173
                break;

2174
            case WINED3D_DECL_USAGE_COLOR:
2175 2176 2177 2178
                if (!semantic->usage_idx)
                    shader_addline(buffer, "color");
                else
                    shader_addline(buffer, "specular%u", (semantic->usage_idx - 1));
2179 2180
                break;

2181
            case WINED3D_DECL_USAGE_TEXCOORD:
2182
                shader_addline(buffer, "texture%u", semantic->usage_idx);
2183 2184
                break;

2185
            case WINED3D_DECL_USAGE_TANGENT:
2186
                shader_addline(buffer, "tangent");
2187 2188
                break;

2189
            case WINED3D_DECL_USAGE_BINORMAL:
2190
                shader_addline(buffer, "binormal");
2191 2192
                break;

2193
            case WINED3D_DECL_USAGE_TESS_FACTOR:
2194
                shader_addline(buffer, "tessfactor");
2195 2196
                break;

2197
            case WINED3D_DECL_USAGE_POSITIONT:
2198
                shader_addline(buffer, "positionT%u", semantic->usage_idx);
2199 2200
                break;

2201
            case WINED3D_DECL_USAGE_FOG:
2202
                shader_addline(buffer, "fog");
2203 2204
                break;

2205
            case WINED3D_DECL_USAGE_DEPTH:
2206
                shader_addline(buffer, "depth");
2207 2208
                break;

2209
            case WINED3D_DECL_USAGE_SAMPLE:
2210
                shader_addline(buffer, "sample");
2211 2212 2213
                break;

            default:
2214
                shader_addline(buffer, "<unknown_semantic(%#x)>", semantic->usage);
2215
                FIXME("Unrecognised semantic usage %#x.\n", semantic->usage);
2216 2217 2218 2219
        }
    }
}

2220 2221
static void shader_dump_register(struct wined3d_string_buffer *buffer,
        const struct wined3d_shader_register *reg, const struct wined3d_shader_version *shader_version)
2222 2223 2224
{
    static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"};
    static const char * const misctype_reg_names[] = {"vPos", "vFace"};
2225
    UINT offset = reg->idx[0].offset;
2226 2227 2228 2229

    switch (reg->type)
    {
        case WINED3DSPR_TEMP:
2230
            shader_addline(buffer, "r");
2231 2232 2233
            break;

        case WINED3DSPR_INPUT:
2234
            shader_addline(buffer, "v");
2235 2236 2237 2238 2239 2240
            break;

        case WINED3DSPR_CONST:
        case WINED3DSPR_CONST2:
        case WINED3DSPR_CONST3:
        case WINED3DSPR_CONST4:
2241
            shader_addline(buffer, "c");
2242
            offset = shader_get_float_offset(reg->type, offset);
2243 2244 2245
            break;

        case WINED3DSPR_TEXTURE: /* vs: case WINED3DSPR_ADDR */
2246
            shader_addline(buffer, "%c", shader_version->type == WINED3D_SHADER_TYPE_PIXEL ? 't' : 'a');
2247 2248 2249
            break;

        case WINED3DSPR_RASTOUT:
2250
            shader_addline(buffer, "%s", rastout_reg_names[offset]);
2251 2252 2253
            break;

        case WINED3DSPR_COLOROUT:
2254
            shader_addline(buffer, "oC");
2255 2256 2257
            break;

        case WINED3DSPR_DEPTHOUT:
2258
            shader_addline(buffer, "oDepth");
2259 2260
            break;

2261 2262 2263 2264 2265 2266 2267 2268
        case WINED3DSPR_DEPTHOUTGE:
            shader_addline(buffer, "oDepthGE");
            break;

        case WINED3DSPR_DEPTHOUTLE:
            shader_addline(buffer, "oDepthLE");
            break;

2269
        case WINED3DSPR_ATTROUT:
2270
            shader_addline(buffer, "oD");
2271 2272 2273 2274 2275
            break;

        case WINED3DSPR_TEXCRDOUT:
            /* Vertex shaders >= 3.0 use general purpose output registers
             * (WINED3DSPR_OUTPUT), which can include an address token. */
2276 2277 2278 2279
            if (shader_version->major >= 3)
                shader_addline(buffer, "o");
            else
                shader_addline(buffer, "oT");
2280 2281 2282
            break;

        case WINED3DSPR_CONSTINT:
2283
            shader_addline(buffer, "i");
2284 2285 2286
            break;

        case WINED3DSPR_CONSTBOOL:
2287
            shader_addline(buffer, "b");
2288 2289 2290
            break;

        case WINED3DSPR_LABEL:
2291
            shader_addline(buffer, "l");
2292 2293 2294
            break;

        case WINED3DSPR_LOOP:
2295
            shader_addline(buffer, "aL");
2296 2297 2298
            break;

        case WINED3DSPR_SAMPLER:
2299
            shader_addline(buffer, "s");
2300 2301 2302
            break;

        case WINED3DSPR_MISCTYPE:
2303
            if (offset > 1)
2304
            {
2305
                FIXME("Unhandled misctype register %u.\n", offset);
2306 2307
                shader_addline(buffer, "<unhandled misctype %#x>", offset);
            }
2308
            else
2309 2310 2311
            {
                shader_addline(buffer, "%s", misctype_reg_names[offset]);
            }
2312 2313 2314
            break;

        case WINED3DSPR_PREDICATE:
2315
            shader_addline(buffer, "p");
2316 2317 2318
            break;

        case WINED3DSPR_IMMCONST:
2319
            shader_addline(buffer, "l");
2320 2321 2322
            break;

        case WINED3DSPR_CONSTBUFFER:
2323
            shader_addline(buffer, "cb");
2324 2325
            break;

2326
        case WINED3DSPR_IMMCONSTBUFFER:
2327
            shader_addline(buffer, "icb");
2328 2329
            break;

2330
        case WINED3DSPR_PRIMID:
2331
            shader_addline(buffer, "primID");
2332 2333
            break;

2334
        case WINED3DSPR_NULL:
2335
            shader_addline(buffer, "null");
2336 2337
            break;

2338 2339 2340 2341
        case WINED3DSPR_RASTERIZER:
            shader_addline(buffer, "rasterizer");
            break;

2342
        case WINED3DSPR_RESOURCE:
2343
            shader_addline(buffer, "t");
2344 2345
            break;

2346 2347 2348 2349
        case WINED3DSPR_UAV:
            shader_addline(buffer, "u");
            break;

2350 2351 2352 2353
        case WINED3DSPR_OUTPOINTID:
            shader_addline(buffer, "vOutputControlPointID");
            break;

2354 2355 2356 2357
        case WINED3DSPR_FORKINSTID:
            shader_addline(buffer, "vForkInstanceId");
            break;

2358 2359 2360 2361
        case WINED3DSPR_JOININSTID:
            shader_addline(buffer, "vJoinInstanceId");
            break;

2362 2363 2364 2365
        case WINED3DSPR_INCONTROLPOINT:
            shader_addline(buffer, "vicp");
            break;

2366 2367 2368 2369
        case WINED3DSPR_OUTCONTROLPOINT:
            shader_addline(buffer, "vocp");
            break;

2370 2371 2372 2373
        case WINED3DSPR_PATCHCONST:
            shader_addline(buffer, "vpc");
            break;

2374 2375 2376 2377
        case WINED3DSPR_TESSCOORD:
            shader_addline(buffer, "vDomainLocation");
            break;

2378 2379 2380 2381
        case WINED3DSPR_GROUPSHAREDMEM:
            shader_addline(buffer, "g");
            break;

2382 2383 2384 2385
        case WINED3DSPR_THREADID:
            shader_addline(buffer, "vThreadID");
            break;

2386 2387 2388 2389
        case WINED3DSPR_THREADGROUPID:
            shader_addline(buffer, "vThreadGroupID");
            break;

2390 2391 2392 2393
        case WINED3DSPR_LOCALTHREADID:
            shader_addline(buffer, "vThreadIDInGroup");
            break;

2394 2395 2396 2397
        case WINED3DSPR_LOCALTHREADINDEX:
            shader_addline(buffer, "vThreadIDInGroupFlattened");
            break;

2398 2399 2400 2401
        case WINED3DSPR_IDXTEMP:
            shader_addline(buffer, "x");
            break;

2402 2403 2404 2405
        case WINED3DSPR_STREAM:
            shader_addline(buffer, "m");
            break;

2406 2407 2408 2409 2410 2411 2412 2413
        case WINED3DSPR_FUNCTIONBODY:
            shader_addline(buffer, "fb");
            break;

        case WINED3DSPR_FUNCTIONPOINTER:
            shader_addline(buffer, "fp");
            break;

2414 2415 2416 2417
        case WINED3DSPR_COVERAGE:
            shader_addline(buffer, "vCoverage");
            break;

2418 2419 2420 2421
        case WINED3DSPR_SAMPLEMASK:
            shader_addline(buffer, "oMask");
            break;

2422 2423 2424 2425
        case WINED3DSPR_GSINSTID:
            shader_addline(buffer, "vGSInstanceID");
            break;

2426
        default:
2427
            shader_addline(buffer, "<unhandled_rtype(%#x)>", reg->type);
2428 2429 2430 2431 2432
            break;
    }

    if (reg->type == WINED3DSPR_IMMCONST)
    {
2433
        shader_addline(buffer, "(");
2434 2435
        switch (reg->immconst_type)
        {
2436
            case WINED3D_IMMCONST_SCALAR:
2437 2438 2439
                switch (reg->data_type)
                {
                    case WINED3D_DATA_FLOAT:
2440
                        shader_addline(buffer, "%.8e", *(const float *)reg->u.immconst_data);
2441 2442
                        break;
                    case WINED3D_DATA_INT:
2443
                        shader_addline(buffer, "%d", reg->u.immconst_data[0]);
2444 2445 2446 2447
                        break;
                    case WINED3D_DATA_RESOURCE:
                    case WINED3D_DATA_SAMPLER:
                    case WINED3D_DATA_UINT:
2448
                        shader_addline(buffer, "%u", reg->u.immconst_data[0]);
2449 2450
                        break;
                    default:
2451
                        shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
2452 2453
                        break;
                }
2454 2455
                break;

2456
            case WINED3D_IMMCONST_VEC4:
2457 2458 2459
                switch (reg->data_type)
                {
                    case WINED3D_DATA_FLOAT:
2460
                        shader_addline(buffer, "%.8e, %.8e, %.8e, %.8e",
2461 2462
                                *(const float *)&reg->u.immconst_data[0], *(const float *)&reg->u.immconst_data[1],
                                *(const float *)&reg->u.immconst_data[2], *(const float *)&reg->u.immconst_data[3]);
2463 2464
                        break;
                    case WINED3D_DATA_INT:
2465
                        shader_addline(buffer, "%d, %d, %d, %d",
2466 2467
                                reg->u.immconst_data[0], reg->u.immconst_data[1],
                                reg->u.immconst_data[2], reg->u.immconst_data[3]);
2468 2469 2470 2471
                        break;
                    case WINED3D_DATA_RESOURCE:
                    case WINED3D_DATA_SAMPLER:
                    case WINED3D_DATA_UINT:
2472
                        shader_addline(buffer, "%u, %u, %u, %u",
2473 2474
                                reg->u.immconst_data[0], reg->u.immconst_data[1],
                                reg->u.immconst_data[2], reg->u.immconst_data[3]);
2475 2476
                        break;
                    default:
2477
                        shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
2478 2479
                        break;
                }
2480 2481 2482
                break;

            default:
2483
                shader_addline(buffer, "<unhandled immconst_type %#x>", reg->immconst_type);
2484 2485
                break;
        }
2486
        shader_addline(buffer, ")");
2487
    }
2488 2489 2490
    else if (reg->type != WINED3DSPR_RASTOUT
            && reg->type != WINED3DSPR_MISCTYPE
            && reg->type != WINED3DSPR_NULL)
2491
    {
2492
        if (offset != ~0u)
2493
        {
2494
            shader_addline(buffer, "[");
2495
            if (reg->idx[0].rel_addr)
2496
            {
2497 2498
                shader_dump_src_param(buffer, reg->idx[0].rel_addr, shader_version);
                shader_addline(buffer, " + ");
2499
            }
2500
            shader_addline(buffer, "%u]", offset);
2501

2502
            if (reg->idx[1].offset != ~0u)
2503
            {
2504
                shader_addline(buffer, "[");
2505
                if (reg->idx[1].rel_addr)
2506
                {
2507 2508
                    shader_dump_src_param(buffer, reg->idx[1].rel_addr, shader_version);
                    shader_addline(buffer, " + ");
2509
                }
2510
                shader_addline(buffer, "%u]", reg->idx[1].offset);
2511 2512
            }
        }
2513 2514 2515

        if (reg->type == WINED3DSPR_FUNCTIONPOINTER)
            shader_addline(buffer, "[%u]", reg->u.fp_body_idx);
2516 2517 2518
    }
}

2519 2520
static void shader_dump_dst_param(struct wined3d_string_buffer *buffer,
        const struct wined3d_shader_dst_param *param, const struct wined3d_shader_version *shader_version)
2521 2522 2523
{
    DWORD write_mask = param->write_mask;

2524
    shader_dump_register(buffer, &param->reg, shader_version);
2525

2526
    if (write_mask && write_mask != WINED3DSP_WRITEMASK_ALL)
2527
    {
2528
        static const char write_mask_chars[] = "xyzw";
2529

2530 2531 2532 2533 2534 2535 2536 2537 2538
        shader_addline(buffer, ".");
        if (write_mask & WINED3DSP_WRITEMASK_0)
            shader_addline(buffer, "%c", write_mask_chars[0]);
        if (write_mask & WINED3DSP_WRITEMASK_1)
            shader_addline(buffer, "%c", write_mask_chars[1]);
        if (write_mask & WINED3DSP_WRITEMASK_2)
            shader_addline(buffer, "%c", write_mask_chars[2]);
        if (write_mask & WINED3DSP_WRITEMASK_3)
            shader_addline(buffer, "%c", write_mask_chars[3]);
2539 2540 2541
    }
}

2542 2543
static void shader_dump_src_param(struct wined3d_string_buffer *buffer,
        const struct wined3d_shader_src_param *param, const struct wined3d_shader_version *shader_version)
2544
{
2545
    enum wined3d_shader_src_modifier src_modifier = param->modifiers;
2546 2547 2548 2549 2550 2551 2552
    DWORD swizzle = param->swizzle;

    if (src_modifier == WINED3DSPSM_NEG
            || src_modifier == WINED3DSPSM_BIASNEG
            || src_modifier == WINED3DSPSM_SIGNNEG
            || src_modifier == WINED3DSPSM_X2NEG
            || src_modifier == WINED3DSPSM_ABSNEG)
2553
        shader_addline(buffer, "-");
2554
    else if (src_modifier == WINED3DSPSM_COMP)
2555
        shader_addline(buffer, "1-");
2556
    else if (src_modifier == WINED3DSPSM_NOT)
2557
        shader_addline(buffer, "!");
2558 2559

    if (src_modifier == WINED3DSPSM_ABS || src_modifier == WINED3DSPSM_ABSNEG)
2560
        shader_addline(buffer, "abs(");
2561

2562
    shader_dump_register(buffer, &param->reg, shader_version);
2563

2564
    switch (src_modifier)
2565
    {
2566 2567 2568
        case WINED3DSPSM_NONE:    break;
        case WINED3DSPSM_NEG:     break;
        case WINED3DSPSM_NOT:     break;
2569 2570 2571 2572
        case WINED3DSPSM_BIAS:    shader_addline(buffer, "_bias"); break;
        case WINED3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break;
        case WINED3DSPSM_SIGN:    shader_addline(buffer, "_bx2"); break;
        case WINED3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break;
2573
        case WINED3DSPSM_COMP:    break;
2574 2575 2576 2577 2578 2579 2580
        case WINED3DSPSM_X2:      shader_addline(buffer, "_x2"); break;
        case WINED3DSPSM_X2NEG:   shader_addline(buffer, "_x2"); break;
        case WINED3DSPSM_DZ:      shader_addline(buffer, "_dz"); break;
        case WINED3DSPSM_DW:      shader_addline(buffer, "_dw"); break;
        case WINED3DSPSM_ABSNEG:  shader_addline(buffer, ")"); break;
        case WINED3DSPSM_ABS:     shader_addline(buffer, ")"); break;
        default:                  shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier);
2581 2582 2583 2584
    }

    if (swizzle != WINED3DSP_NOSWIZZLE)
    {
2585
        static const char swizzle_chars[] = "xyzw";
2586 2587 2588 2589 2590 2591 2592 2593 2594
        DWORD swizzle_x = swizzle & 0x03;
        DWORD swizzle_y = (swizzle >> 2) & 0x03;
        DWORD swizzle_z = (swizzle >> 4) & 0x03;
        DWORD swizzle_w = (swizzle >> 6) & 0x03;

        if (swizzle_x == swizzle_y
                && swizzle_x == swizzle_z
                && swizzle_x == swizzle_w)
        {
2595
            shader_addline(buffer, ".%c", swizzle_chars[swizzle_x]);
2596 2597 2598
        }
        else
        {
2599
            shader_addline(buffer, ".%c%c%c%c", swizzle_chars[swizzle_x], swizzle_chars[swizzle_y],
2600 2601 2602 2603 2604
                    swizzle_chars[swizzle_z], swizzle_chars[swizzle_w]);
        }
    }
}

2605 2606 2607 2608
/* Shared code in order to generate the bulk of the shader string. */
HRESULT shader_generate_code(const struct wined3d_shader *shader, struct wined3d_string_buffer *buffer,
        const struct wined3d_shader_reg_maps *reg_maps, void *backend_ctx,
        const DWORD *start, const DWORD *end)
2609
{
2610
    struct wined3d_device *device = shader->device;
2611 2612
    const struct wined3d_shader_frontend *fe = shader->frontend;
    void *fe_data = shader->frontend_data;
2613
    struct wined3d_shader_version shader_version;
2614
    struct wined3d_shader_parser_state state;
2615
    struct wined3d_shader_instruction ins;
2616
    struct wined3d_shader_tex_mx tex_mx;
2617
    struct wined3d_shader_context ctx;
2618
    const DWORD *ptr;
2619 2620

    /* Initialize current parsing state. */
2621
    tex_mx.current_row = 0;
2622 2623 2624
    state.current_loop_depth = 0;
    state.current_loop_reg = 0;
    state.in_subroutine = FALSE;
2625

2626
    ctx.shader = shader;
2627
    ctx.gl_info = &device->adapter->gl_info;
2628 2629
    ctx.reg_maps = reg_maps;
    ctx.buffer = buffer;
2630
    ctx.tex_mx = &tex_mx;
2631
    ctx.state = &state;
2632 2633 2634 2635
    ctx.backend_data = backend_ctx;
    ins.ctx = &ctx;

    fe->shader_read_header(fe_data, &ptr, &shader_version);
2636 2637
    if (start)
        ptr = start;
2638

2639
    while (!fe->shader_is_end(fe_data, &ptr) && ptr != end)
2640 2641
    {
        /* Read opcode. */
2642
        fe->shader_read_instruction(fe_data, &ptr, &ins);
2643 2644 2645 2646

        /* Unknown opcode and its parameters. */
        if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
        {
2647 2648
            WARN("Encountered unrecognised or invalid instruction.\n");
            return WINED3DERR_INVALIDCALL;
2649 2650
        }

2651 2652
        if (ins.predicate)
            FIXME("Predicates not implemented.\n");
2653 2654 2655 2656

        /* Call appropriate function for output target */
        device->shader_backend->shader_handle_instruction(&ins);
    }
2657 2658

    return WINED3D_OK;
2659 2660
}

2661 2662
static void shader_dump_ins_modifiers(struct wined3d_string_buffer *buffer,
        const struct wined3d_shader_dst_param *dst)
2663 2664 2665 2666 2667 2668
{
    DWORD mmask = dst->modifiers;

    switch (dst->shift)
    {
        case 0: break;
2669 2670 2671 2672 2673 2674 2675
        case 13: shader_addline(buffer, "_d8"); break;
        case 14: shader_addline(buffer, "_d4"); break;
        case 15: shader_addline(buffer, "_d2"); break;
        case 1: shader_addline(buffer, "_x2"); break;
        case 2: shader_addline(buffer, "_x4"); break;
        case 3: shader_addline(buffer, "_x8"); break;
        default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break;
2676 2677
    }

2678 2679 2680
    if (mmask & WINED3DSPDM_SATURATE)         shader_addline(buffer, "_sat");
    if (mmask & WINED3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp");
    if (mmask & WINED3DSPDM_MSAMPCENTROID)    shader_addline(buffer, "_centroid");
2681 2682

    mmask &= ~(WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION | WINED3DSPDM_MSAMPCENTROID);
2683
    if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask);
2684 2685
}

2686
static void shader_dump_primitive_type(struct wined3d_string_buffer *buffer,
2687
        const struct wined3d_shader_primitive_type *primitive_type)
2688
{
2689
    switch (primitive_type->type)
2690 2691
    {
        case WINED3D_PT_UNDEFINED:
2692
            shader_addline(buffer, "undefined");
2693 2694
            break;
        case WINED3D_PT_POINTLIST:
2695
            shader_addline(buffer, "pointlist");
2696 2697
            break;
        case WINED3D_PT_LINELIST:
2698
            shader_addline(buffer, "linelist");
2699 2700
            break;
        case WINED3D_PT_LINESTRIP:
2701
            shader_addline(buffer, "linestrip");
2702 2703
            break;
        case WINED3D_PT_TRIANGLELIST:
2704
            shader_addline(buffer, "trianglelist");
2705 2706
            break;
        case WINED3D_PT_TRIANGLESTRIP:
2707
            shader_addline(buffer, "trianglestrip");
2708 2709
            break;
        case WINED3D_PT_TRIANGLEFAN:
2710
            shader_addline(buffer, "trianglefan");
2711 2712
            break;
        case WINED3D_PT_LINELIST_ADJ:
2713
            shader_addline(buffer, "linelist_adj");
2714 2715
            break;
        case WINED3D_PT_LINESTRIP_ADJ:
2716
            shader_addline(buffer, "linestrip_adj");
2717 2718
            break;
        case WINED3D_PT_TRIANGLELIST_ADJ:
2719
            shader_addline(buffer, "trianglelist_adj");
2720 2721
            break;
        case WINED3D_PT_TRIANGLESTRIP_ADJ:
2722
            shader_addline(buffer, "trianglestrip_adj");
2723
            break;
2724 2725 2726
        case WINED3D_PT_PATCH:
            shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count);
            break;
2727
        default:
2728
            shader_addline(buffer, "<unrecognized_primitive_type %#x>", primitive_type->type);
2729 2730 2731 2732
            break;
    }
}

2733 2734
static void shader_dump_interpolation_mode(struct wined3d_string_buffer *buffer,
        enum wined3d_shader_interpolation_mode interpolation_mode)
2735 2736 2737 2738
{
    switch (interpolation_mode)
    {
        case WINED3DSIM_CONSTANT:
2739
            shader_addline(buffer, "constant");
2740 2741
            break;
        case WINED3DSIM_LINEAR:
2742
            shader_addline(buffer, "linear");
2743 2744
            break;
        case WINED3DSIM_LINEAR_CENTROID:
2745
            shader_addline(buffer, "linear centroid");
2746 2747
            break;
        case WINED3DSIM_LINEAR_NOPERSPECTIVE:
2748
            shader_addline(buffer, "linear noperspective");
2749 2750
            break;
        case WINED3DSIM_LINEAR_SAMPLE:
2751
            shader_addline(buffer, "linear sample");
2752 2753
            break;
        case WINED3DSIM_LINEAR_NOPERSPECTIVE_CENTROID:
2754
            shader_addline(buffer, "linear noperspective centroid");
2755 2756
            break;
        case WINED3DSIM_LINEAR_NOPERSPECTIVE_SAMPLE:
2757
            shader_addline(buffer, "linear noperspective sample");
2758 2759
            break;
        default:
2760
            shader_addline(buffer, "<unrecognized_interpolation_mode %#x>", interpolation_mode);
2761 2762 2763 2764
            break;
    }
}

2765
static void shader_trace_init(const struct wined3d_shader_frontend *fe, void *fe_data)
2766 2767
{
    struct wined3d_shader_version shader_version;
2768
    struct wined3d_string_buffer buffer;
2769
    const char *type_prefix;
2770
    const char *p, *q;
2771
    const DWORD *ptr;
2772 2773
    DWORD i;

2774 2775 2776 2777 2778 2779
    if (!string_buffer_init(&buffer))
    {
        ERR("Failed to initialize string buffer.\n");
        return;
    }

2780 2781
    fe->shader_read_header(fe_data, &ptr, &shader_version);

2782 2783
    TRACE("Parsing %p.\n", ptr);

2784 2785 2786 2787 2788 2789
    switch (shader_version.type)
    {
        case WINED3D_SHADER_TYPE_VERTEX:
            type_prefix = "vs";
            break;

2790 2791 2792 2793
        case WINED3D_SHADER_TYPE_HULL:
            type_prefix = "hs";
            break;

2794 2795 2796 2797
        case WINED3D_SHADER_TYPE_DOMAIN:
            type_prefix = "ds";
            break;

2798 2799 2800 2801 2802 2803 2804 2805
        case WINED3D_SHADER_TYPE_GEOMETRY:
            type_prefix = "gs";
            break;

        case WINED3D_SHADER_TYPE_PIXEL:
            type_prefix = "ps";
            break;

2806 2807 2808 2809
        case WINED3D_SHADER_TYPE_COMPUTE:
            type_prefix = "cs";
            break;

2810 2811 2812 2813 2814 2815
        default:
            FIXME("Unhandled shader type %#x.\n", shader_version.type);
            type_prefix = "unknown";
            break;
    }

2816
    shader_addline(&buffer, "%s_%u_%u\n", type_prefix, shader_version.major, shader_version.minor);
2817 2818 2819 2820 2821

    while (!fe->shader_is_end(fe_data, &ptr))
    {
        struct wined3d_shader_instruction ins;

2822
        fe->shader_read_instruction(fe_data, &ptr, &ins);
2823 2824
        if (ins.handler_idx == WINED3DSIH_TABLE_SIZE)
        {
2825 2826
            WARN("Skipping unrecognized instruction.\n");
            shader_addline(&buffer, "<unrecognized instruction>\n");
2827 2828 2829
            continue;
        }

2830
        if (ins.handler_idx == WINED3DSIH_DCL || ins.handler_idx == WINED3DSIH_DCL_UAV_TYPED)
2831
        {
2832
            shader_dump_decl_usage(&buffer, &ins.declaration.semantic, ins.flags, &shader_version);
2833 2834 2835
            shader_dump_ins_modifiers(&buffer, &ins.declaration.semantic.reg);
            shader_addline(&buffer, " ");
            shader_dump_dst_param(&buffer, &ins.declaration.semantic.reg, &shader_version);
2836
        }
2837 2838
        else if (ins.handler_idx == WINED3DSIH_DCL_CONSTANT_BUFFER)
        {
2839 2840 2841 2842
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_src_param(&buffer, &ins.declaration.src, &shader_version);
            shader_addline(&buffer, ", %s",
                    ins.flags & WINED3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed");
2843
        }
2844 2845 2846 2847 2848 2849 2850 2851 2852 2853
        else if (ins.handler_idx == WINED3DSIH_DCL_FUNCTION_BODY)
        {
            shader_addline(&buffer, "%s fb%u",
                    shader_opcode_names[ins.handler_idx], ins.declaration.index);
        }
        else if (ins.handler_idx == WINED3DSIH_DCL_FUNCTION_TABLE)
        {
            shader_addline(&buffer, "%s ft%u = {...}",
                    shader_opcode_names[ins.handler_idx], ins.declaration.index);
        }
2854 2855
        else if (ins.handler_idx == WINED3DSIH_DCL_GLOBAL_FLAGS)
        {
2856 2857
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_global_flags(&buffer, ins.flags);
2858
        }
2859 2860 2861 2862 2863
        else if (ins.handler_idx == WINED3DSIH_DCL_HS_MAX_TESSFACTOR)
        {
            shader_addline(&buffer, "%s %.8e", shader_opcode_names[ins.handler_idx],
                    ins.declaration.max_tessellation_factor);
        }
2864 2865
        else if (ins.handler_idx == WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER)
        {
2866
            shader_addline(&buffer, "%s {\n", shader_opcode_names[ins.handler_idx]);
2867
            for (i = 0; i < ins.declaration.icb->vec4_count; ++i)
2868
            {
2869
                shader_addline(&buffer, "    {0x%08x, 0x%08x, 0x%08x, 0x%08x},\n",
2870 2871 2872 2873 2874
                        ins.declaration.icb->data[4 * i + 0],
                        ins.declaration.icb->data[4 * i + 1],
                        ins.declaration.icb->data[4 * i + 2],
                        ins.declaration.icb->data[4 * i + 3]);
            }
2875
            shader_addline(&buffer, "}");
2876
        }
2877 2878 2879 2880 2881 2882
        else if (ins.handler_idx == WINED3DSIH_DCL_INDEX_RANGE)
        {
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_dst_param(&buffer, &ins.declaration.index_range.first_register, &shader_version);
            shader_addline(&buffer, " %u", ins.declaration.index_range.last_register);
        }
2883 2884 2885 2886 2887 2888 2889
        else if (ins.handler_idx == WINED3DSIH_DCL_INDEXABLE_TEMP)
        {
            shader_addline(&buffer, "%s x[%u][%u], %u", shader_opcode_names[ins.handler_idx],
                    ins.declaration.indexable_temp.register_idx,
                    ins.declaration.indexable_temp.register_size,
                    ins.declaration.indexable_temp.component_count);
        }
2890 2891
        else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS)
        {
2892 2893 2894 2895
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_interpolation_mode(&buffer, ins.flags);
            shader_addline(&buffer, " ");
            shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2896
        }
2897
        else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS_SGV
2898
                || ins.handler_idx == WINED3DSIH_DCL_INPUT_SGV
2899
                || ins.handler_idx == WINED3DSIH_DCL_INPUT_SIV
2900
                || ins.handler_idx == WINED3DSIH_DCL_OUTPUT_SIV)
2901
        {
2902 2903 2904
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_dst_param(&buffer, &ins.declaration.register_semantic.reg, &shader_version);
            shader_addline(&buffer, ", ");
2905
            shader_dump_shader_input_sysval_semantic(&buffer, ins.declaration.register_semantic.sysval_semantic);
2906 2907 2908
        }
        else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS_SIV)
        {
2909 2910 2911 2912 2913
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_interpolation_mode(&buffer, ins.flags);
            shader_addline(&buffer, " ");
            shader_dump_dst_param(&buffer, &ins.declaration.register_semantic.reg, &shader_version);
            shader_addline(&buffer, ", ");
2914
            shader_dump_shader_input_sysval_semantic(&buffer, ins.declaration.register_semantic.sysval_semantic);
2915
        }
2916 2917
        else if (ins.handler_idx == WINED3DSIH_DCL_INPUT
                || ins.handler_idx == WINED3DSIH_DCL_OUTPUT)
2918
        {
2919 2920
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2921
        }
2922 2923
        else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PRIMITIVE
                || ins.handler_idx == WINED3DSIH_DCL_OUTPUT_TOPOLOGY)
2924
        {
2925
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2926
            shader_dump_primitive_type(&buffer, &ins.declaration.primitive_type);
2927
        }
2928 2929 2930 2931 2932 2933
        else if (ins.handler_idx == WINED3DSIH_DCL_INTERFACE)
        {
            shader_addline(&buffer, "%s fp[%u][%u][%u] = {...}",
                    shader_opcode_names[ins.handler_idx], ins.declaration.fp.index,
                    ins.declaration.fp.array_size, ins.declaration.fp.body_count);
        }
2934 2935 2936 2937 2938
        else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_RAW)
        {
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
        }
2939 2940 2941 2942 2943 2944
        else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_STRUCTURED)
        {
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_dst_param(&buffer, &ins.declaration.structured_resource.reg, &shader_version);
            shader_addline(&buffer, ", %u", ins.declaration.structured_resource.byte_stride);
        }
2945 2946
        else if (ins.handler_idx == WINED3DSIH_DCL_SAMPLER)
        {
2947 2948
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2949
            if (ins.flags == WINED3DSI_SAMPLER_COMPARISON_MODE)
2950
                shader_addline(&buffer, ", comparisonMode");
2951
        }
2952
        else if (ins.handler_idx == WINED3DSIH_DCL_TEMPS
2953
                || ins.handler_idx == WINED3DSIH_DCL_GS_INSTANCES
2954
                || ins.handler_idx == WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT
2955
                || ins.handler_idx == WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT
2956
                || ins.handler_idx == WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT
2957 2958
                || ins.handler_idx == WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT
                || ins.handler_idx == WINED3DSIH_DCL_VERTICES_OUT)
2959
        {
2960
            shader_addline(&buffer, "%s %u", shader_opcode_names[ins.handler_idx], ins.declaration.count);
2961
        }
2962 2963 2964 2965 2966
        else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_DOMAIN)
        {
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_tessellator_domain(&buffer, ins.declaration.tessellator_domain);
        }
2967 2968 2969 2970 2971
        else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE)
        {
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_tessellator_output_primitive(&buffer, ins.declaration.tessellator_output_primitive);
        }
2972 2973 2974 2975 2976
        else if (ins.handler_idx == WINED3DSIH_DCL_TESSELLATOR_PARTITIONING)
        {
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_tessellator_partitioning(&buffer, ins.declaration.tessellator_partitioning);
        }
2977 2978 2979 2980 2981 2982
        else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_RAW)
        {
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_dst_param(&buffer, &ins.declaration.tgsm_raw.reg, &shader_version);
            shader_addline(&buffer, ", %u", ins.declaration.tgsm_raw.byte_count);
        }
2983 2984 2985 2986 2987 2988 2989
        else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_STRUCTURED)
        {
            shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
            shader_dump_dst_param(&buffer, &ins.declaration.tgsm_structured.reg, &shader_version);
            shader_addline(&buffer, ", %u, %u", ins.declaration.tgsm_structured.byte_stride,
                    ins.declaration.tgsm_structured.structure_count);
        }
2990 2991 2992 2993 2994 2995 2996
        else if (ins.handler_idx == WINED3DSIH_DCL_THREAD_GROUP)
        {
            shader_addline(&buffer, "%s %u, %u, %u", shader_opcode_names[ins.handler_idx],
                    ins.declaration.thread_group_size.x,
                    ins.declaration.thread_group_size.y,
                    ins.declaration.thread_group_size.z);
        }
2997 2998 2999 3000 3001 3002 3003
        else if (ins.handler_idx == WINED3DSIH_DCL_UAV_RAW)
        {
            shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
            shader_dump_uav_flags(&buffer, ins.flags);
            shader_addline(&buffer, " ");
            shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
        }
3004 3005 3006 3007 3008 3009 3010 3011
        else if (ins.handler_idx == WINED3DSIH_DCL_UAV_STRUCTURED)
        {
            shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
            shader_dump_uav_flags(&buffer, ins.flags);
            shader_addline(&buffer, " ");
            shader_dump_dst_param(&buffer, &ins.declaration.structured_resource.reg, &shader_version);
            shader_addline(&buffer, ", %u", ins.declaration.structured_resource.byte_stride);
        }
3012 3013
        else if (ins.handler_idx == WINED3DSIH_DEF)
        {
3014
            shader_addline(&buffer, "def c%u = %.8e, %.8e, %.8e, %.8e", shader_get_float_offset(ins.dst[0].reg.type,
3015
                    ins.dst[0].reg.idx[0].offset),
3016 3017 3018 3019
                    *(const float *)&ins.src[0].reg.u.immconst_data[0],
                    *(const float *)&ins.src[0].reg.u.immconst_data[1],
                    *(const float *)&ins.src[0].reg.u.immconst_data[2],
                    *(const float *)&ins.src[0].reg.u.immconst_data[3]);
3020 3021 3022
        }
        else if (ins.handler_idx == WINED3DSIH_DEFI)
        {
3023
            shader_addline(&buffer, "defi i%u = %d, %d, %d, %d", ins.dst[0].reg.idx[0].offset,
3024 3025 3026 3027
                    ins.src[0].reg.u.immconst_data[0],
                    ins.src[0].reg.u.immconst_data[1],
                    ins.src[0].reg.u.immconst_data[2],
                    ins.src[0].reg.u.immconst_data[3]);
3028 3029 3030
        }
        else if (ins.handler_idx == WINED3DSIH_DEFB)
        {
3031
            shader_addline(&buffer, "defb b%u = %s",
3032
                    ins.dst[0].reg.idx[0].offset, ins.src[0].reg.u.immconst_data[0] ? "true" : "false");
3033 3034 3035 3036 3037
        }
        else
        {
            if (ins.predicate)
            {
3038 3039 3040
                shader_addline(&buffer, "(");
                shader_dump_src_param(&buffer, ins.predicate, &shader_version);
                shader_addline(&buffer, ") ");
3041 3042 3043
            }

            /* PixWin marks instructions with the coissue flag with a '+' */
3044 3045
            if (ins.coissue)
                shader_addline(&buffer, "+");
3046

3047
            shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
3048

3049
            if (ins.handler_idx == WINED3DSIH_BREAKP
3050
                    || ins.handler_idx == WINED3DSIH_CONTINUEP
3051
                    || ins.handler_idx == WINED3DSIH_IF
3052 3053
                    || ins.handler_idx == WINED3DSIH_RETP
                    || ins.handler_idx == WINED3DSIH_TEXKILL)
3054 3055 3056 3057 3058 3059 3060 3061 3062
            {
                switch (ins.flags)
                {
                    case WINED3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(&buffer, "_nz"); break;
                    case WINED3D_SHADER_CONDITIONAL_OP_Z:  shader_addline(&buffer, "_z"); break;
                    default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags); break;
                }
            }
            else if (ins.handler_idx == WINED3DSIH_IFC
3063 3064 3065 3066
                    || ins.handler_idx == WINED3DSIH_BREAKC)
            {
                switch (ins.flags)
                {
3067 3068 3069 3070 3071 3072 3073
                    case WINED3D_SHADER_REL_OP_GT: shader_addline(&buffer, "_gt"); break;
                    case WINED3D_SHADER_REL_OP_EQ: shader_addline(&buffer, "_eq"); break;
                    case WINED3D_SHADER_REL_OP_GE: shader_addline(&buffer, "_ge"); break;
                    case WINED3D_SHADER_REL_OP_LT: shader_addline(&buffer, "_lt"); break;
                    case WINED3D_SHADER_REL_OP_NE: shader_addline(&buffer, "_ne"); break;
                    case WINED3D_SHADER_REL_OP_LE: shader_addline(&buffer, "_le"); break;
                    default: shader_addline(&buffer, "_(%u)", ins.flags);
3074 3075 3076 3077 3078 3079
                }
            }
            else if (ins.handler_idx == WINED3DSIH_TEX
                    && shader_version.major >= 2
                    && (ins.flags & WINED3DSI_TEXLD_PROJECT))
            {
3080
                shader_addline(&buffer, "p");
3081
            }
3082
            else if (ins.handler_idx == WINED3DSIH_RESINFO && ins.flags)
3083 3084 3085
            {
                switch (ins.flags)
                {
3086 3087 3088
                    case WINED3DSI_RESINFO_RCP_FLOAT: shader_addline(&buffer, "_rcpFloat"); break;
                    case WINED3DSI_RESINFO_UINT: shader_addline(&buffer, "_uint"); break;
                    default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags);
3089 3090
                }
            }
3091 3092 3093 3094 3095 3096 3097 3098
            else if (ins.handler_idx == WINED3DSIH_SAMPLE_INFO && ins.flags)
            {
                switch (ins.flags)
                {
                    case WINED3DSI_SAMPLE_INFO_UINT: shader_addline(&buffer, "_uint"); break;
                    default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags);
                }
            }
3099 3100 3101 3102
            else if (ins.handler_idx == WINED3DSIH_SYNC)
            {
                shader_dump_sync_flags(&buffer, ins.flags);
            }
3103 3104 3105 3106
            else
            {
                shader_dump_precise_flags(&buffer, ins.flags);
            }
3107

3108 3109 3110
            if (wined3d_shader_instruction_has_texel_offset(&ins))
                shader_addline(&buffer, "(%d,%d,%d)", ins.texel_offset.u, ins.texel_offset.v, ins.texel_offset.w);

3111
            for (i = 0; i < ins.dst_count; ++i)
3112
            {
3113 3114 3115
                shader_dump_ins_modifiers(&buffer, &ins.dst[i]);
                shader_addline(&buffer, !i ? " " : ", ");
                shader_dump_dst_param(&buffer, &ins.dst[i], &shader_version);
3116 3117 3118 3119 3120
            }

            /* Other source tokens */
            for (i = ins.dst_count; i < (ins.dst_count + ins.src_count); ++i)
            {
3121 3122
                shader_addline(&buffer, !i ? " " : ", ");
                shader_dump_src_param(&buffer, &ins.src[i - ins.dst_count], &shader_version);
3123 3124
            }
        }
3125
        shader_addline(&buffer, "\n");
3126
    }
3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137

    for (p = buffer.buffer; *p; p = q)
    {
        if (!(q = strstr(p, "\n")))
            q = p + strlen(p);
        else
            ++q;
        TRACE("    %.*s", (int)(q - p), p);
    }

    string_buffer_free(&buffer);
3138 3139
}

3140
static void shader_cleanup(struct wined3d_shader *shader)
3141
{
3142 3143
    if (shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_HULL)
    {
3144 3145 3146
        heap_free(shader->u.hs.phases.control_point);
        heap_free(shader->u.hs.phases.fork);
        heap_free(shader->u.hs.phases.join);
3147 3148 3149
    }
    else if (shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
    {
3150
        heap_free((void *)shader->u.gs.so_desc.elements);
3151
    }
3152

3153 3154 3155
    heap_free(shader->patch_constant_signature.elements);
    heap_free(shader->output_signature.elements);
    heap_free(shader->input_signature.elements);
3156
    shader->device->shader_backend->shader_destroy(shader);
3157
    shader_cleanup_reg_maps(&shader->reg_maps);
3158
    heap_free(shader->byte_code);
3159 3160 3161 3162 3163 3164 3165
    shader_delete_constant_list(&shader->constantsF);
    shader_delete_constant_list(&shader->constantsB);
    shader_delete_constant_list(&shader->constantsI);
    list_remove(&shader->shader_list_entry);

    if (shader->frontend && shader->frontend_data)
        shader->frontend->shader_free(shader->frontend_data);
3166 3167
}

3168 3169
struct shader_none_priv
{
3170
    const struct wined3d_vertex_pipe_ops *vertex_pipe;
3171
    const struct wined3d_fragment_pipe_ops *fragment_pipe;
3172
    BOOL ffp_proj_control;
3173 3174
};

3175
static void shader_none_handle_instruction(const struct wined3d_shader_instruction *ins) {}
3176
static void shader_none_precompile(void *shader_priv, struct wined3d_shader *shader) {}
3177 3178
static void shader_none_select_compute(void *shader_priv, struct wined3d_context *context,
        const struct wined3d_state *state) {}
3179 3180
static void shader_none_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) {}
static void shader_none_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) {}
3181
static void shader_none_load_constants(void *shader_priv, struct wined3d_context *context,
3182
        const struct wined3d_state *state) {}
3183
static void shader_none_destroy(struct wined3d_shader *shader) {}
3184
static void shader_none_free_context_data(struct wined3d_context *context) {}
3185
static void shader_none_init_context_state(struct wined3d_context *context) {}
3186

3187
/* Context activation is done by the caller. */
3188
static void shader_none_select(void *shader_priv, struct wined3d_context *context,
3189
        const struct wined3d_state *state)
3190
{
3191
    struct shader_none_priv *priv = shader_priv;
3192

3193
    priv->vertex_pipe->vp_enable(context, !use_vs(state));
3194
    priv->fragment_pipe->fp_enable(context, !use_ps(state));
3195 3196
}

3197
/* Context activation is done by the caller. */
3198
static void shader_none_disable(void *shader_priv, struct wined3d_context *context)
3199 3200 3201
{
    struct shader_none_priv *priv = shader_priv;

3202
    priv->vertex_pipe->vp_enable(context, FALSE);
3203
    priv->fragment_pipe->fp_enable(context, FALSE);
3204

3205 3206
    context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL)
            | (1u << WINED3D_SHADER_TYPE_VERTEX)
3207
            | (1u << WINED3D_SHADER_TYPE_GEOMETRY)
3208
            | (1u << WINED3D_SHADER_TYPE_HULL)
3209 3210
            | (1u << WINED3D_SHADER_TYPE_DOMAIN)
            | (1u << WINED3D_SHADER_TYPE_COMPUTE);
3211 3212
}

3213
static HRESULT shader_none_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe,
3214
        const struct wined3d_fragment_pipe_ops *fragment_pipe)
3215
{
3216
    struct fragment_caps fragment_caps;
3217
    void *vertex_priv, *fragment_priv;
3218 3219
    struct shader_none_priv *priv;

3220
    if (!(priv = heap_alloc(sizeof(*priv))))
3221 3222
        return E_OUTOFMEMORY;

3223 3224 3225
    if (!(vertex_priv = vertex_pipe->vp_alloc(&none_shader_backend, priv)))
    {
        ERR("Failed to initialize vertex pipe.\n");
3226
        heap_free(priv);
3227 3228 3229
        return E_FAIL;
    }

3230
    if (!(fragment_priv = fragment_pipe->alloc_private(&none_shader_backend, priv)))
3231
    {
3232
        ERR("Failed to initialize fragment pipe.\n");
3233
        vertex_pipe->vp_free(device, NULL);
3234
        heap_free(priv);
3235
        return E_FAIL;
3236 3237
    }

3238 3239
    priv->vertex_pipe = vertex_pipe;
    priv->fragment_pipe = fragment_pipe;
3240
    fragment_pipe->get_caps(device->adapter, &fragment_caps);
3241
    priv->ffp_proj_control = fragment_caps.wined3d_caps & WINED3D_FRAGMENT_CAP_PROJ_CONTROL;
3242 3243

    device->vertex_priv = vertex_priv;
3244
    device->fragment_priv = fragment_priv;
3245 3246 3247 3248 3249
    device->shader_priv = priv;

    return WINED3D_OK;
}

3250
static void shader_none_free(struct wined3d_device *device, struct wined3d_context *context)
3251 3252 3253
{
    struct shader_none_priv *priv = device->shader_priv;

3254 3255
    priv->fragment_pipe->free_private(device, context);
    priv->vertex_pipe->vp_free(device, context);
3256
    heap_free(priv);
3257 3258
}

3259 3260 3261 3262 3263
static BOOL shader_none_allocate_context_data(struct wined3d_context *context)
{
    return TRUE;
}

3264
static void shader_none_get_caps(const struct wined3d_adapter *adapter, struct shader_caps *caps)
3265 3266
{
    /* Set the shader caps to 0 for the none shader backend */
3267
    memset(caps, 0, sizeof(*caps));
3268 3269 3270 3271
}

static BOOL shader_none_color_fixup_supported(struct color_fixup_desc fixup)
{
3272 3273 3274
    /* We "support" every possible fixup, since we don't support any shader
     * model, and will never have to actually sample a texture. */
    return TRUE;
3275 3276
}

3277 3278 3279 3280
static BOOL shader_none_has_ffp_proj_control(void *shader_priv)
{
    struct shader_none_priv *priv = shader_priv;

3281
    return priv->ffp_proj_control;
3282 3283
}

3284 3285
const struct wined3d_shader_backend_ops none_shader_backend =
{
3286
    shader_none_handle_instruction,
3287
    shader_none_precompile,
3288
    shader_none_select,
3289
    shader_none_select_compute,
3290
    shader_none_disable,
3291 3292 3293 3294 3295 3296
    shader_none_update_float_vertex_constants,
    shader_none_update_float_pixel_constants,
    shader_none_load_constants,
    shader_none_destroy,
    shader_none_alloc,
    shader_none_free,
3297 3298
    shader_none_allocate_context_data,
    shader_none_free_context_data,
3299
    shader_none_init_context_state,
3300 3301
    shader_none_get_caps,
    shader_none_color_fixup_supported,
3302
    shader_none_has_ffp_proj_control,
3303
};
3304

3305 3306 3307 3308 3309 3310 3311 3312 3313 3314
static unsigned int shader_max_version_from_feature_level(enum wined3d_feature_level level)
{
    switch (level)
    {
        case WINED3D_FEATURE_LEVEL_11_1:
        case WINED3D_FEATURE_LEVEL_11:
            return 5;
        case WINED3D_FEATURE_LEVEL_10_1:
        case WINED3D_FEATURE_LEVEL_10:
            return 4;
3315
        case WINED3D_FEATURE_LEVEL_9_3:
3316
            return 3;
3317
        case WINED3D_FEATURE_LEVEL_9_2:
3318 3319 3320 3321 3322 3323 3324 3325 3326
        case WINED3D_FEATURE_LEVEL_9_1:
            return 2;
        default:
            return 1;
    }
}

static HRESULT shader_set_function(struct wined3d_shader *shader, struct wined3d_device *device,
        enum wined3d_shader_type type, unsigned int float_const_count)
3327
{
3328
    const struct wined3d_d3d_info *d3d_info = &shader->device->adapter->d3d_info;
3329
    struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
3330
    const struct wined3d_shader_version *version = &reg_maps->shader_version;
3331
    const struct wined3d_shader_frontend *fe;
3332
    unsigned int backend_version;
3333
    HRESULT hr;
3334

3335 3336
    TRACE("shader %p, device %p, type %s, float_const_count %u.\n",
            shader, device, debug_shader_type(type), float_const_count);
3337

3338 3339 3340
    fe = shader->frontend;
    if (!(shader->frontend_data = fe->shader_init(shader->function,
            shader->functionLength, &shader->output_signature)))
3341 3342 3343 3344 3345 3346
    {
        FIXME("Failed to initialize frontend.\n");
        return WINED3DERR_INVALIDCALL;
    }

    /* First pass: trace shader. */
3347
    if (TRACE_ON(d3d_shader))
3348
        shader_trace_init(fe, shader->frontend_data);
3349 3350

    /* Second pass: figure out which registers are used, what the semantics are, etc. */
3351
    if (FAILED(hr = shader_get_registers_used(shader, float_const_count)))
3352
        return hr;
3353

3354
    if (version->type != type)
3355
    {
3356
        WARN("Wrong shader type %s.\n", debug_shader_type(reg_maps->shader_version.type));
3357 3358
        return WINED3DERR_INVALIDCALL;
    }
3359
    if (version->major > shader_max_version_from_feature_level(device->feature_level))
3360
    {
3361
        WARN("Shader version %u not supported by this device.\n", version->major);
3362 3363
        return WINED3DERR_INVALIDCALL;
    }
3364 3365 3366
    switch (type)
    {
        case WINED3D_SHADER_TYPE_VERTEX:
3367
            backend_version = d3d_info->limits.vs_version;
3368
            break;
3369 3370 3371
        case WINED3D_SHADER_TYPE_HULL:
            backend_version = d3d_info->limits.hs_version;
            break;
3372 3373 3374
        case WINED3D_SHADER_TYPE_DOMAIN:
            backend_version = d3d_info->limits.ds_version;
            break;
3375
        case WINED3D_SHADER_TYPE_GEOMETRY:
3376
            backend_version = d3d_info->limits.gs_version;
3377 3378
            break;
        case WINED3D_SHADER_TYPE_PIXEL:
3379
            backend_version = d3d_info->limits.ps_version;
3380
            break;
3381 3382 3383
        case WINED3D_SHADER_TYPE_COMPUTE:
            backend_version = d3d_info->limits.cs_version;
            break;
3384
        default:
3385
            FIXME("No backend version-checking for this shader type.\n");
3386 3387
            backend_version = 0;
    }
3388
    if (version->major > backend_version)
3389
    {
3390 3391
        WARN("Shader version %u.%u not supported by the current shader backend.\n",
                version->major, version->minor);
3392 3393
        return WINED3DERR_INVALIDCALL;
    }
3394

3395 3396
    shader->load_local_constsF = shader->lconst_inf_or_nan;

3397 3398 3399
    return WINED3D_OK;
}

3400
ULONG CDECL wined3d_shader_incref(struct wined3d_shader *shader)
3401
{
3402
    ULONG refcount = InterlockedIncrement(&shader->ref);
3403 3404 3405 3406 3407 3408

    TRACE("%p increasing refcount to %u.\n", shader, refcount);

    return refcount;
}

3409 3410 3411
static void wined3d_shader_init_object(void *object)
{
    struct wined3d_shader *shader = object;
3412 3413 3414
    struct wined3d_device *device = shader->device;

    list_add_head(&device->shaders, &shader->shader_list_entry);
3415

3416
    device->shader_backend->shader_precompile(device->shader_priv, shader);
3417 3418
}

3419 3420 3421
static void wined3d_shader_destroy_object(void *object)
{
    shader_cleanup(object);
3422
    heap_free(object);
3423 3424
}

3425
ULONG CDECL wined3d_shader_decref(struct wined3d_shader *shader)
3426
{
3427
    ULONG refcount = InterlockedDecrement(&shader->ref);
3428 3429 3430 3431 3432

    TRACE("%p decreasing refcount to %u.\n", shader, refcount);

    if (!refcount)
    {
3433
        shader->parent_ops->wined3d_object_destroyed(shader->parent);
3434
        wined3d_cs_destroy_object(shader->device->cs, wined3d_shader_destroy_object, shader);
3435 3436 3437 3438 3439
    }

    return refcount;
}

3440
void * CDECL wined3d_shader_get_parent(const struct wined3d_shader *shader)
3441
{
3442
    TRACE("shader %p.\n", shader);
3443

3444
    return shader->parent;
3445 3446
}

3447 3448
HRESULT CDECL wined3d_shader_get_byte_code(const struct wined3d_shader *shader,
        void *byte_code, UINT *byte_code_size)
3449
{
3450
    TRACE("shader %p, byte_code %p, byte_code_size %p.\n", shader, byte_code, byte_code_size);
3451

3452 3453
    if (!byte_code)
    {
3454
        *byte_code_size = shader->byte_code_size;
3455 3456 3457
        return WINED3D_OK;
    }

3458
    if (*byte_code_size < shader->byte_code_size)
3459 3460 3461 3462 3463 3464 3465
    {
        /* MSDN claims (for d3d8 at least) that if *byte_code_size is smaller
         * than the required size we should write the required size and
         * return D3DERR_MOREDATA. That's not actually true. */
        return WINED3DERR_INVALIDCALL;
    }

3466
    memcpy(byte_code, shader->byte_code, shader->byte_code_size);
3467 3468

    return WINED3D_OK;
3469 3470
}

3471 3472
/* Set local constants for d3d8 shaders. */
HRESULT CDECL wined3d_shader_set_local_constants_float(struct wined3d_shader *shader,
3473 3474
        UINT start_idx, const float *src_data, UINT count)
{
3475 3476
    UINT end_idx = start_idx + count;
    UINT i;
3477

3478
    TRACE("shader %p, start_idx %u, src_data %p, count %u.\n", shader, start_idx, src_data, count);
3479

3480
    if (end_idx > shader->limits->constant_float)
3481 3482
    {
        WARN("end_idx %u > float constants limit %u.\n",
3483 3484
                end_idx, shader->limits->constant_float);
        end_idx = shader->limits->constant_float;
3485 3486 3487 3488
    }

    for (i = start_idx; i < end_idx; ++i)
    {
3489
        struct wined3d_shader_lconst *lconst;
3490
        float *value;
3491 3492

        if (!(lconst = heap_alloc(sizeof(*lconst))))
3493 3494 3495
            return E_OUTOFMEMORY;

        lconst->idx = i;
3496 3497
        value = (float *)lconst->value;
        memcpy(value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
3498
        list_add_head(&shader->constantsF, &lconst->entry);
3499 3500 3501 3502 3503 3504

        if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
                || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
        {
            shader->lconst_inf_or_nan = TRUE;
        }
3505 3506 3507 3508
    }

    return WINED3D_OK;
}
3509

3510
static void init_interpolation_compile_args(DWORD *interpolation_args,
3511
        const struct wined3d_shader *pixel_shader, const struct wined3d_d3d_info *d3d_info)
3512
{
3513 3514
    if (!d3d_info->shader_output_interpolation || !pixel_shader
            || pixel_shader->reg_maps.shader_version.major < 4)
3515
    {
3516
        memset(interpolation_args, 0, sizeof(pixel_shader->u.ps.interpolation_mode));
3517 3518 3519 3520 3521 3522 3523
        return;
    }

    memcpy(interpolation_args, pixel_shader->u.ps.interpolation_mode,
            sizeof(pixel_shader->u.ps.interpolation_mode));
}

3524
void find_vs_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
3525
        WORD swizzle_map, struct vs_compile_args *args, const struct wined3d_context *context)
3526
{
3527 3528 3529 3530 3531
    const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
    const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
    const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
    const struct wined3d_d3d_info *d3d_info = context->d3d_info;

3532
    args->fog_src = state->render_states[WINED3D_RS_FOGTABLEMODE]
3533
            == WINED3D_FOG_NONE ? VS_FOG_COORD : VS_FOG_Z;
3534 3535
    args->clip_enabled = state->render_states[WINED3D_RS_CLIPPING]
            && state->render_states[WINED3D_RS_CLIPPLANEENABLE];
3536 3537
    args->point_size = state->gl_primitive_type == GL_POINTS;
    args->per_vertex_point_size = shader->reg_maps.point_size;
3538
    args->next_shader_type = hull_shader ? WINED3D_SHADER_TYPE_HULL
3539
            : geometry_shader ? WINED3D_SHADER_TYPE_GEOMETRY : WINED3D_SHADER_TYPE_PIXEL;
3540
    if (shader->reg_maps.shader_version.major >= 4)
3541 3542 3543
        args->next_shader_input_count = hull_shader ? hull_shader->limits->packed_input
                : geometry_shader ? geometry_shader->limits->packed_input
                : pixel_shader ? pixel_shader->limits->packed_input : 0;
3544 3545
    else
        args->next_shader_input_count = 0;
3546
    args->swizzle_map = swizzle_map;
3547 3548 3549
    if (d3d_info->emulated_flatshading)
        args->flatshading = state->render_states[WINED3D_RS_SHADEMODE] == WINED3D_SHADE_FLAT;
    else
3550
        args->flatshading = 0;
3551 3552

    init_interpolation_compile_args(args->interpolation_mode,
3553
            args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, d3d_info);
3554 3555 3556 3557
}

static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2)
{
3558 3559 3560 3561 3562 3563 3564 3565
    if (usage_idx1 != usage_idx2)
        return FALSE;
    if (usage1 == usage2)
        return TRUE;
    if (usage1 == WINED3D_DECL_USAGE_POSITION && usage2 == WINED3D_DECL_USAGE_POSITIONT)
        return TRUE;
    if (usage2 == WINED3D_DECL_USAGE_POSITION && usage1 == WINED3D_DECL_USAGE_POSITIONT)
        return TRUE;
3566 3567 3568 3569

    return FALSE;
}

3570
BOOL vshader_get_input(const struct wined3d_shader *shader,
3571
        BYTE usage_req, BYTE usage_idx_req, unsigned int *regnum)
3572
{
3573
    WORD map = shader->reg_maps.input_registers;
3574 3575 3576 3577 3578 3579
    unsigned int i;

    for (i = 0; map; map >>= 1, ++i)
    {
        if (!(map & 1)) continue;

3580 3581
        if (match_usage(shader->u.vs.attributes[i].usage,
                shader->u.vs.attributes[i].usage_idx, usage_req, usage_idx_req))
3582 3583 3584 3585 3586 3587 3588 3589
        {
            *regnum = i;
            return TRUE;
        }
    }
    return FALSE;
}

3590
static HRESULT shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
3591
        const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
3592 3593 3594
{
    HRESULT hr;

3595
    TRACE("byte_code %p, byte_code_size %#lx.\n", desc->byte_code, (long)desc->byte_code_size);
3596

3597 3598 3599
    if (!desc->byte_code)
        return WINED3DERR_INVALIDCALL;

3600 3601 3602 3603 3604
    shader->ref = 1;
    shader->device = device;
    shader->parent = parent;
    shader->parent_ops = parent_ops;

3605
    list_init(&shader->linked_programs);
3606 3607 3608 3609 3610
    list_init(&shader->constantsF);
    list_init(&shader->constantsB);
    list_init(&shader->constantsI);
    shader->lconst_inf_or_nan = FALSE;
    list_init(&shader->reg_maps.indexable_temps);
3611
    list_init(&shader->shader_list_entry);
3612

3613
    if (desc->byte_code_size == ~(size_t)0)
3614
    {
3615 3616 3617 3618 3619 3620 3621
        struct wined3d_shader_version shader_version;
        const struct wined3d_shader_frontend *fe;
        struct wined3d_shader_instruction ins;
        const DWORD *ptr;
        void *fe_data;

        if (!(shader->frontend = shader_select_frontend(WINED3D_SHADER_BYTE_CODE_FORMAT_SM1)))
3622
        {
3623 3624
            FIXME("Unable to find frontend for shader.\n");
            hr = WINED3DERR_INVALIDCALL;
3625
            goto fail;
3626 3627
        }

3628 3629
        fe = shader->frontend;
        if (!(fe_data = fe->shader_init(desc->byte_code, desc->byte_code_size, &shader->output_signature)))
3630
        {
3631 3632 3633 3634
            WARN("Failed to initialise frontend data.\n");
            hr = WINED3DERR_INVALIDCALL;
            goto fail;
        }
3635

3636 3637 3638
        fe->shader_read_header(fe_data, &ptr, &shader_version);
        while (!fe->shader_is_end(fe_data, &ptr))
            fe->shader_read_instruction(fe_data, &ptr, &ins);
3639

3640
        fe->shader_free(fe_data);
3641

3642
        shader->byte_code_size = (ptr - desc->byte_code) * sizeof(*ptr);
3643

3644
        if (!(shader->byte_code = heap_alloc(shader->byte_code_size)))
3645 3646 3647 3648
        {
            hr = E_OUTOFMEMORY;
            goto fail;
        }
3649
        memcpy(shader->byte_code, desc->byte_code, shader->byte_code_size);
3650 3651 3652

        shader->function = shader->byte_code;
        shader->functionLength = shader->byte_code_size;
3653
    }
3654 3655 3656 3657
    else
    {
        enum wined3d_shader_byte_code_format format;
        unsigned int max_version;
3658

3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679
        if (!(shader->byte_code = heap_alloc(desc->byte_code_size)))
        {
            hr = E_OUTOFMEMORY;
            goto fail;
        }
        memcpy(shader->byte_code, desc->byte_code, desc->byte_code_size);
        shader->byte_code_size = desc->byte_code_size;

        max_version = shader_max_version_from_feature_level(device->feature_level);
        if (FAILED(hr = shader_extract_from_dxbc(shader, max_version, &format)))
            goto fail;

        if (!(shader->frontend = shader_select_frontend(format)))
        {
            FIXME("Unable to find frontend for shader.\n");
            hr = WINED3DERR_INVALIDCALL;
            goto fail;
        }
    }

    return WINED3D_OK;
3680 3681 3682 3683

fail:
    shader_cleanup(shader);
    return hr;
3684 3685
}

3686
static HRESULT vertex_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
3687 3688 3689 3690 3691 3692
        const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
{
    struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
    unsigned int i;
    HRESULT hr;

3693
    if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
3694 3695
        return hr;

3696 3697 3698 3699 3700 3701 3702
    if (FAILED(hr = shader_set_function(shader, device,
            WINED3D_SHADER_TYPE_VERTEX, device->adapter->d3d_info.limits.vs_uniform_count)))
    {
        shader_cleanup(shader);
        return hr;
    }

3703
    for (i = 0; i < shader->input_signature.element_count; ++i)
3704
    {
3705 3706
        const struct wined3d_shader_signature_element *input = &shader->input_signature.elements[i];

3707
        if (!(reg_maps->input_registers & (1u << input->register_idx)) || !input->semantic_name)
3708 3709
            continue;

3710 3711
        shader->u.vs.attributes[input->register_idx].usage =
                shader_usage_from_semantic_name(input->semantic_name);
3712
        shader->u.vs.attributes[input->register_idx].usage_idx = input->semantic_idx;
3713 3714
    }

3715 3716
    if (reg_maps->usesrelconstF && !list_empty(&shader->constantsF))
        shader->load_local_constsF = TRUE;
3717 3718 3719 3720

    return WINED3D_OK;
}

3721
static struct wined3d_shader_signature_element *shader_find_signature_element(const struct wined3d_shader_signature *s,
3722 3723 3724 3725 3726 3727 3728 3729
        unsigned int stream_idx, const char *semantic_name, unsigned int semantic_idx)
{
    struct wined3d_shader_signature_element *e = s->elements;
    unsigned int i;

    for (i = 0; i < s->element_count; ++i)
    {
        if (e[i].stream_idx == stream_idx
3730
                && !_strnicmp(e[i].semantic_name, semantic_name, -1)
3731 3732 3733 3734 3735 3736 3737
                && e[i].semantic_idx == semantic_idx)
            return &e[i];
    }

    return NULL;
}

3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759
BOOL shader_get_stream_output_register_info(const struct wined3d_shader *shader,
        const struct wined3d_stream_output_element *so_element, unsigned int *register_idx, unsigned int *component_idx)
{
    const struct wined3d_shader_signature_element *output;
    unsigned int idx;

    if (!(output = shader_find_signature_element(&shader->output_signature,
            so_element->stream_idx, so_element->semantic_name, so_element->semantic_idx)))
        return FALSE;

    for (idx = 0; idx < 4; ++idx)
    {
        if (output->mask & (1u << idx))
            break;
    }
    idx += so_element->component_idx;

    *register_idx = output->register_idx;
    *component_idx = idx;
    return TRUE;
}

3760 3761
static HRESULT geometry_shader_init_stream_output(struct wined3d_shader *shader,
        const struct wined3d_stream_output_desc *so_desc)
3762
{
3763
    const struct wined3d_shader_frontend *fe = shader->frontend;
3764
    const struct wined3d_shader_signature_element *output;
3765
    unsigned int i, component_idx, register_idx, mask;
3766
    struct wined3d_stream_output_element *elements;
3767 3768 3769
    struct wined3d_shader_version shader_version;
    const DWORD *ptr;
    void *fe_data;
3770
    HRESULT hr;
3771

3772 3773 3774 3775
    if (!so_desc)
        return WINED3D_OK;

    if (!(fe_data = fe->shader_init(shader->function, shader->functionLength, &shader->output_signature)))
3776
    {
3777 3778 3779 3780 3781 3782 3783 3784 3785
        WARN("Failed to initialise frontend data.\n");
        return WINED3DERR_INVALIDCALL;
    }
    fe->shader_read_header(fe_data, &ptr, &shader_version);
    fe->shader_free(fe_data);

    switch (shader_version.type)
    {
        case WINED3D_SHADER_TYPE_VERTEX:
3786
        case WINED3D_SHADER_TYPE_DOMAIN:
3787 3788 3789 3790 3791 3792 3793 3794
            shader->function = NULL;
            shader->functionLength = 0;
            break;
        case WINED3D_SHADER_TYPE_GEOMETRY:
            break;
        default:
            WARN("Wrong shader type %s.\n", debug_shader_type(shader_version.type));
            return E_INVALIDARG;
3795 3796
    }

3797 3798 3799 3800 3801 3802 3803 3804 3805
    if (!shader->function)
    {
        shader->reg_maps.shader_version = shader_version;
        shader->reg_maps.shader_version.type = WINED3D_SHADER_TYPE_GEOMETRY;
        shader_set_limits(shader);
        if (FAILED(hr = shader_scan_output_signature(shader)))
            return hr;
    }

3806 3807 3808 3809 3810 3811 3812
    if (!(elements = heap_calloc(so_desc->element_count, sizeof(*elements))))
        return E_OUTOFMEMORY;

    shader->u.gs.so_desc = *so_desc;
    shader->u.gs.so_desc.elements = elements;
    memcpy(elements, so_desc->elements, so_desc->element_count * sizeof(*elements));

3813 3814 3815 3816 3817 3818 3819
    for (i = 0; i < so_desc->element_count; ++i)
    {
        struct wined3d_stream_output_element *e = &elements[i];

        if (!e->semantic_name)
            continue;
        if (!(output = shader_find_signature_element(&shader->output_signature,
3820 3821
                e->stream_idx, e->semantic_name, e->semantic_idx))
                || !shader_get_stream_output_register_info(shader, e, &register_idx, &component_idx))
3822 3823 3824 3825 3826 3827 3828
        {
            WARN("Failed to find output signature element for stream output entry.\n");
            return E_INVALIDARG;
        }

        e->semantic_name = output->semantic_name;

3829
        mask = ((1u << e->component_count) - 1) << component_idx;
3830 3831 3832
        if ((output->mask & 0xff & mask) != mask)
        {
            WARN("Invalid component range %u-%u (mask %#x), output mask %#x.\n",
3833
                    component_idx, e->component_count, mask, output->mask & 0xff);
3834 3835 3836 3837
            return E_INVALIDARG;
        }
    }

3838 3839 3840 3841 3842 3843 3844 3845 3846 3847
    return WINED3D_OK;
}

static HRESULT geometry_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
        const struct wined3d_shader_desc *desc, const struct wined3d_stream_output_desc *so_desc,
        void *parent, const struct wined3d_parent_ops *parent_ops)
{
    HRESULT hr;

    if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
3848 3849
        return hr;

3850 3851 3852
    if (FAILED(hr = geometry_shader_init_stream_output(shader, so_desc)))
        goto fail;

3853 3854 3855
    if (shader->function
            && FAILED(hr = shader_set_function(shader, device, WINED3D_SHADER_TYPE_GEOMETRY, 0)))
        goto fail;
3856 3857

    return WINED3D_OK;
3858 3859 3860 3861

fail:
    shader_cleanup(shader);
    return hr;
3862 3863
}

3864 3865 3866
void find_ds_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
        struct ds_compile_args *args, const struct wined3d_context *context)
{
3867 3868
    const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
    const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
3869 3870 3871 3872 3873
    const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];

    args->tessellator_output_primitive = hull_shader->u.hs.tessellator_output_primitive;
    args->tessellator_partitioning = hull_shader->u.hs.tessellator_partitioning;

3874 3875 3876
    args->output_count = geometry_shader ? geometry_shader->limits->packed_input
            : pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
    args->next_shader_type = geometry_shader ? WINED3D_SHADER_TYPE_GEOMETRY : WINED3D_SHADER_TYPE_PIXEL;
3877 3878

    args->render_offscreen = context->render_offscreen;
3879

3880
    init_interpolation_compile_args(args->interpolation_mode,
3881
            args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, context->d3d_info);
3882

3883
    args->padding = 0;
3884 3885
}

3886
void find_gs_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
3887
        struct gs_compile_args *args, const struct wined3d_context *context)
3888
{
3889 3890 3891 3892
    const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];

    args->output_count = pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;

3893 3894 3895
    if (!(args->primitive_type = shader->u.gs.input_type))
        args->primitive_type = d3d_primitive_type_from_gl(state->gl_primitive_type);

3896
    init_interpolation_compile_args(args->interpolation_mode, pixel_shader, context->d3d_info);
3897 3898
}

3899
void find_ps_compile_args(const struct wined3d_state *state, const struct wined3d_shader *shader,
3900
        BOOL position_transformed, struct ps_compile_args *args, const struct wined3d_context *context)
3901
{
3902
    const struct wined3d_gl_info *gl_info = &context->device->adapter->gl_info;
3903
    const struct wined3d_d3d_info *d3d_info = context->d3d_info;
3904
    struct wined3d_texture *texture;
3905
    unsigned int i;
3906 3907

    memset(args, 0, sizeof(*args)); /* FIXME: Make sure all bits are set. */
3908
    if (!d3d_info->srgb_write_control && needs_srgb_write(context, state, state->fb))
3909
    {
3910
        static unsigned int warned = 0;
3911

3912 3913 3914 3915
        args->srgb_correction = 1;
        if (state->render_states[WINED3D_RS_ALPHABLENDENABLE] && !warned++)
            WARN("Blending into a sRGB render target with no GL_ARB_framebuffer_sRGB "
                    "support, expect rendering artifacts.\n");
3916 3917
    }

3918 3919
    if (shader->reg_maps.shader_version.major == 1
            && shader->reg_maps.shader_version.minor <= 3)
3920
    {
3921
        for (i = 0; i < shader->limits->sampler; ++i)
3922
        {
3923
            DWORD flags = state->texture_states[i][WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS];
3924

3925
            if (flags & WINED3D_TTFF_PROJECTED)
3926
            {
3927
                DWORD tex_transform = flags & ~WINED3D_TTFF_PROJECTED;
3928

3929
                if (!state->shader[WINED3D_SHADER_TYPE_VERTEX])
3930
                {
3931
                    enum wined3d_shader_resource_type resource_type = shader->reg_maps.resource_info[i].type;
3932
                    unsigned int j;
3933
                    unsigned int index = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
3934 3935
                    DWORD max_valid = WINED3D_TTFF_COUNT4;

3936 3937 3938 3939 3940
                    for (j = 0; j < state->vertex_declaration->element_count; ++j)
                    {
                        struct wined3d_vertex_declaration_element *element =
                                &state->vertex_declaration->elements[j];

3941
                        if (element->usage == WINED3D_DECL_USAGE_TEXCOORD
3942 3943 3944 3945 3946 3947
                                && element->usage_idx == index)
                        {
                            max_valid = element->format->component_count;
                            break;
                        }
                    }
3948 3949 3950 3951 3952 3953
                    if (!tex_transform || tex_transform > max_valid)
                    {
                        WARN("Fixing up projected texture transform flags from %#x to %#x.\n",
                                tex_transform, max_valid);
                        tex_transform = max_valid;
                    }
3954 3955 3956 3957 3958
                    if ((resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_1D && tex_transform > WINED3D_TTFF_COUNT1)
                            || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D
                            && tex_transform > WINED3D_TTFF_COUNT2)
                            || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D
                            && tex_transform > WINED3D_TTFF_COUNT3))
3959 3960 3961 3962 3963
                        tex_transform |= WINED3D_PSARGS_PROJECTED;
                    else
                    {
                        WARN("Application requested projected texture with unsuitable texture coordinates.\n");
                        WARN("(texture unit %u, transform flags %#x, sampler type %u).\n",
3964
                                i, tex_transform, resource_type);
3965
                    }
3966
                }
3967
                else
3968
                    tex_transform = WINED3D_TTFF_COUNT4 | WINED3D_PSARGS_PROJECTED;
3969 3970

                args->tex_transform |= tex_transform << i * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
3971
            }
3972 3973
        }
    }
3974 3975 3976
    if (shader->reg_maps.shader_version.major == 1
            && shader->reg_maps.shader_version.minor <= 4)
    {
3977
        for (i = 0; i < shader->limits->sampler; ++i)
3978
        {
3979
            if (!shader->reg_maps.resource_info[i].type)
3980 3981 3982 3983 3984
                continue;

            /* Treat unbound textures as 2D. The dummy texture will provide
             * the proper sample value. The tex_types bitmap defaults to
             * 2D because of the memset. */
3985
            if (!(texture = state->textures[i]))
3986 3987
                continue;

3988
            switch (wined3d_texture_gl(texture)->target)
3989 3990
            {
                /* RECT textures are distinguished from 2D textures via np2_fixup */
3991
                default:
3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003
                    break;

                case GL_TEXTURE_3D:
                    args->tex_types |= WINED3D_SHADER_TEX_3D << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
                    break;

                case GL_TEXTURE_CUBE_MAP_ARB:
                    args->tex_types |= WINED3D_SHADER_TEX_CUBE << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
                    break;
            }
        }
    }
4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032
    else if (shader->reg_maps.shader_version.major <= 3)
    {
        for (i = 0; i < shader->limits->sampler; ++i)
        {
            enum wined3d_shader_resource_type resource_type;
            enum wined3d_shader_tex_types tex_type;

            if (!(resource_type = shader->reg_maps.resource_info[i].type))
                continue;

            switch (resource_type)
            {
                case WINED3D_SHADER_RESOURCE_TEXTURE_3D:
                    tex_type = WINED3D_SHADER_TEX_3D;
                    break;
                case WINED3D_SHADER_RESOURCE_TEXTURE_CUBE:
                    tex_type = WINED3D_SHADER_TEX_CUBE;
                    break;
                default:
                    tex_type = WINED3D_SHADER_TEX_2D;
                    break;
            }

            if ((texture = state->textures[i]))
            {
                if (texture->resource.type == WINED3D_RTYPE_TEXTURE_2D
                        && resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D
                        && !(texture->resource.usage & WINED3DUSAGE_LEGACY_CUBEMAP))
                    tex_type = WINED3D_SHADER_TEX_2D;
4033 4034 4035
                else if (texture->resource.type == WINED3D_RTYPE_TEXTURE_3D
                        && resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D)
                    tex_type = WINED3D_SHADER_TEX_3D;
4036 4037 4038 4039
            }
            args->tex_types |= tex_type << i * WINED3D_PSARGS_TEXTYPE_SHIFT;
        }
    }
4040

4041
    if (shader->reg_maps.shader_version.major >= 4)
4042
    {
4043 4044
        /* In SM4+ we use dcl_sampler in order to determine if we should use shadow sampler. */
        args->shadow = 0;
4045
        for (i = 0 ; i < WINED3D_MAX_FRAGMENT_SAMPLERS; ++i)
4046
            args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
4047 4048 4049 4050
        args->np2_fixup = 0;
    }
    else
    {
4051
        for (i = 0; i < WINED3D_MAX_FRAGMENT_SAMPLERS; ++i)
4052 4053 4054 4055 4056 4057 4058 4059 4060 4061
        {
            if (!shader->reg_maps.resource_info[i].type)
                continue;

            texture = state->textures[i];
            if (!texture)
            {
                args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
                continue;
            }
4062
            if (can_use_texture_swizzle(d3d_info, texture->resource.format))
4063 4064 4065
                args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
            else
                args->color_fixup[i] = texture->resource.format->color_fixup;
4066

4067 4068
            if (texture->resource.format_flags & WINED3DFMT_FLAG_SHADOW)
                args->shadow |= 1u << i;
4069

4070 4071 4072 4073
            /* Flag samplers that need NP2 texcoord fixup. */
            if (!(texture->flags & WINED3D_TEXTURE_POW2_MAT_IDENT))
                args->np2_fixup |= (1u << i);
        }
4074
    }
4075

4076
    if (shader->reg_maps.shader_version.major >= 3)
4077
    {
4078
        if (position_transformed)
4079
            args->vp_mode = WINED3D_VP_MODE_NONE;
4080
        else if (use_vs(state))
4081
            args->vp_mode = WINED3D_VP_MODE_SHADER;
4082
        else
4083
            args->vp_mode = WINED3D_VP_MODE_FF;
4084
        args->fog = WINED3D_FFP_PS_FOG_OFF;
4085 4086 4087
    }
    else
    {
4088
        args->vp_mode = WINED3D_VP_MODE_SHADER;
4089
        if (state->render_states[WINED3D_RS_FOGENABLE])
4090
        {
4091
            switch (state->render_states[WINED3D_RS_FOGTABLEMODE])
4092
            {
4093
                case WINED3D_FOG_NONE:
4094
                    if (position_transformed || use_vs(state))
4095
                    {
4096
                        args->fog = WINED3D_FFP_PS_FOG_LINEAR;
4097 4098 4099
                        break;
                    }

4100
                    switch (state->render_states[WINED3D_RS_FOGVERTEXMODE])
4101
                    {
4102
                        case WINED3D_FOG_NONE: /* Fall through. */
4103 4104 4105
                        case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
                        case WINED3D_FOG_EXP:    args->fog = WINED3D_FFP_PS_FOG_EXP;    break;
                        case WINED3D_FOG_EXP2:   args->fog = WINED3D_FFP_PS_FOG_EXP2;   break;
4106 4107 4108
                    }
                    break;

4109 4110 4111
                case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
                case WINED3D_FOG_EXP:    args->fog = WINED3D_FFP_PS_FOG_EXP;    break;
                case WINED3D_FOG_EXP2:   args->fog = WINED3D_FFP_PS_FOG_EXP2;   break;
4112 4113 4114 4115
            }
        }
        else
        {
4116
            args->fog = WINED3D_FFP_PS_FOG_OFF;
4117 4118
        }
    }
4119

4120
    if (!d3d_info->full_ffp_varyings)
4121 4122 4123 4124
    {
        const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];

        args->texcoords_initialized = 0;
4125
        for (i = 0; i < WINED3D_MAX_TEXTURES; ++i)
4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138
        {
            if (vs)
            {
                if (state->shader[WINED3D_SHADER_TYPE_VERTEX]->reg_maps.output_registers & (1u << i))
                    args->texcoords_initialized |= 1u << i;
            }
            else
            {
                const struct wined3d_stream_info *si = &context->stream_info;
                unsigned int coord_idx = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];

                if ((state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX] >> WINED3D_FFP_TCI_SHIFT)
                        & WINED3D_FFP_TCI_MASK
4139
                        || (coord_idx < WINED3D_MAX_TEXTURES && (si->use_map & (1u << (WINED3D_FFP_TEXCOORD0 + coord_idx)))))
4140 4141 4142 4143 4144 4145
                    args->texcoords_initialized |= 1u << i;
            }
        }
    }
    else
    {
4146
        args->texcoords_initialized = (1u << WINED3D_MAX_TEXTURES) - 1;
4147 4148
    }

4149 4150
    args->pointsprite = state->render_states[WINED3D_RS_POINTSPRITEENABLE]
            && state->gl_primitive_type == GL_POINTS;
4151

4152
    if (d3d_info->ffp_alpha_test)
4153 4154 4155 4156 4157 4158
        args->alpha_test_func = WINED3D_CMP_ALWAYS - 1;
    else
        args->alpha_test_func = (state->render_states[WINED3D_RS_ALPHATESTENABLE]
                ? wined3d_sanitize_cmp_func(state->render_states[WINED3D_RS_ALPHAFUNC])
                : WINED3D_CMP_ALWAYS) - 1;

4159 4160
    if (d3d_info->emulated_flatshading)
        args->flatshading = state->render_states[WINED3D_RS_SHADEMODE] == WINED3D_SHADE_FLAT;
4161

4162 4163
    args->render_offscreen = shader->reg_maps.vpos && gl_info->supported[ARB_FRAGMENT_COORD_CONVENTIONS]
            ? context->render_offscreen : 0;
4164

4165
    for (i = 0; i < ARRAY_SIZE(state->fb->render_targets); ++i)
4166
    {
4167 4168 4169
        struct wined3d_rendertarget_view *rtv = state->fb->render_targets[i];
        if (rtv && rtv->format->id == WINED3DFMT_A8_UNORM && !is_identity_fixup(rtv->format->color_fixup))
            args->rt_alpha_swizzle |= 1u << i;
4170
    }
4171 4172
}

4173
static HRESULT pixel_shader_init(struct wined3d_shader *shader, struct wined3d_device *device,
4174
        const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
4175 4176 4177 4178
{
    const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
    unsigned int i, highest_reg_used = 0, num_regs_used = 0;
    HRESULT hr;
4179

4180
    if (FAILED(hr = shader_init(shader, device, desc, parent, parent_ops)))
4181
        return hr;
4182

4183 4184 4185 4186 4187 4188 4189
    if (FAILED(hr = shader_set_function(shader, device,
            WINED3D_SHADER_TYPE_PIXEL, device->adapter->d3d_info.limits.ps_uniform_count)))
    {
        shader_cleanup(shader);
        return hr;
    }

4190 4191
    for (i = 0; i < MAX_REG_INPUT; ++i)
    {
4192
        if (shader->u.ps.input_reg_used & (1u << i))
4193 4194 4195 4196 4197 4198 4199 4200 4201
        {
            ++num_regs_used;
            highest_reg_used = i;
        }
    }

    /* Don't do any register mapping magic if it is not needed, or if we can't
     * achieve anything anyway */
    if (highest_reg_used < (gl_info->limits.glsl_varyings / 4)
4202 4203
            || num_regs_used > (gl_info->limits.glsl_varyings / 4)
            || shader->reg_maps.shader_version.major >= 4)
4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214
    {
        if (num_regs_used > (gl_info->limits.glsl_varyings / 4))
        {
            /* This happens with relative addressing. The input mapper function
             * warns about this if the higher registers are declared too, so
             * don't write a FIXME here */
            WARN("More varying registers used than supported\n");
        }

        for (i = 0; i < MAX_REG_INPUT; ++i)
        {
4215
            shader->u.ps.input_reg_map[i] = i;
4216 4217
        }

4218
        shader->u.ps.declared_in_count = highest_reg_used + 1;
4219 4220 4221
    }
    else
    {
4222
        shader->u.ps.declared_in_count = 0;
4223 4224
        for (i = 0; i < MAX_REG_INPUT; ++i)
        {
4225
            if (shader->u.ps.input_reg_used & (1u << i))
4226 4227
                shader->u.ps.input_reg_map[i] = shader->u.ps.declared_in_count++;
            else shader->u.ps.input_reg_map[i] = ~0U;
4228 4229 4230 4231 4232 4233
        }
    }

    return WINED3D_OK;
}

4234
enum wined3d_shader_resource_type pixelshader_get_resource_type(const struct wined3d_shader_reg_maps *reg_maps,
4235
        unsigned int resource_idx, DWORD tex_types)
4236
{
4237
    static enum wined3d_shader_resource_type shader_resource_type_from_shader_tex_types[] =
4238
    {
4239 4240 4241 4242
        WINED3D_SHADER_RESOURCE_TEXTURE_2D,     /* WINED3D_SHADER_TEX_2D     */
        WINED3D_SHADER_RESOURCE_TEXTURE_3D,     /* WINED3D_SHADER_TEX_3D     */
        WINED3D_SHADER_RESOURCE_TEXTURE_CUBE,   /* WINED3D_SHADER_TEX_CUBE   */
    };
4243

4244
    unsigned int idx;
4245

4246
    if (reg_maps->shader_version.major > 3)
4247
        return reg_maps->resource_info[resource_idx].type;
4248

4249 4250 4251 4252 4253 4254
    if (!reg_maps->resource_info[resource_idx].type)
        return 0;

    idx = (tex_types >> resource_idx * WINED3D_PSARGS_TEXTYPE_SHIFT) & WINED3D_PSARGS_TEXTYPE_MASK;
    assert(idx < ARRAY_SIZE(shader_resource_type_from_shader_tex_types));
    return shader_resource_type_from_shader_tex_types[idx];
4255
}
4256

4257 4258 4259 4260 4261 4262 4263 4264 4265
HRESULT CDECL wined3d_shader_create_cs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
        void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
{
    struct wined3d_shader *object;
    HRESULT hr;

    TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
            device, desc, parent, parent_ops, shader);

4266
    if (!(object = heap_alloc_zero(sizeof(*object))))
4267 4268
        return E_OUTOFMEMORY;

4269
    if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
4270 4271
    {
        WARN("Failed to initialize compute shader, hr %#x.\n", hr);
4272
        heap_free(object);
4273 4274 4275
        return hr;
    }

4276 4277 4278 4279 4280 4281 4282
    if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_COMPUTE, 0)))
    {
        shader_cleanup(object);
        heap_free(object);
        return hr;
    }

4283 4284
    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);

4285 4286 4287 4288 4289 4290
    TRACE("Created compute shader %p.\n", object);
    *shader = object;

    return WINED3D_OK;
}

4291 4292 4293 4294 4295 4296 4297 4298 4299
HRESULT CDECL wined3d_shader_create_ds(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
        void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
{
    struct wined3d_shader *object;
    HRESULT hr;

    TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
            device, desc, parent, parent_ops, shader);

4300
    if (!(object = heap_alloc_zero(sizeof(*object))))
4301 4302
        return E_OUTOFMEMORY;

4303
    if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
4304 4305
    {
        WARN("Failed to initialize domain shader, hr %#x.\n", hr);
4306
        heap_free(object);
4307 4308 4309
        return hr;
    }

4310 4311 4312 4313 4314 4315 4316
    if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_DOMAIN, 0)))
    {
        shader_cleanup(object);
        heap_free(object);
        return hr;
    }

4317 4318
    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);

4319 4320 4321 4322 4323 4324
    TRACE("Created domain shader %p.\n", object);
    *shader = object;

    return WINED3D_OK;
}

4325
HRESULT CDECL wined3d_shader_create_gs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
4326 4327
        const struct wined3d_stream_output_desc *so_desc, void *parent,
        const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4328 4329 4330 4331
{
    struct wined3d_shader *object;
    HRESULT hr;

4332 4333
    TRACE("device %p, desc %p, so_desc %p, parent %p, parent_ops %p, shader %p.\n",
            device, desc, so_desc, parent, parent_ops, shader);
4334

4335
    if (!(object = heap_alloc_zero(sizeof(*object))))
4336 4337
        return E_OUTOFMEMORY;

4338
    if (FAILED(hr = geometry_shader_init(object, device, desc, so_desc, parent, parent_ops)))
4339 4340
    {
        WARN("Failed to initialize geometry shader, hr %#x.\n", hr);
4341
        heap_free(object);
4342 4343 4344
        return hr;
    }

4345 4346
    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);

4347 4348 4349 4350 4351 4352
    TRACE("Created geometry shader %p.\n", object);
    *shader = object;

    return WINED3D_OK;
}

4353 4354 4355 4356 4357 4358 4359 4360 4361
HRESULT CDECL wined3d_shader_create_hs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
        void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
{
    struct wined3d_shader *object;
    HRESULT hr;

    TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
            device, desc, parent, parent_ops, shader);

4362
    if (!(object = heap_alloc_zero(sizeof(*object))))
4363 4364
        return E_OUTOFMEMORY;

4365
    if (FAILED(hr = shader_init(object, device, desc, parent, parent_ops)))
4366 4367
    {
        WARN("Failed to initialize hull shader, hr %#x.\n", hr);
4368
        heap_free(object);
4369
        return hr;
4370 4371 4372 4373 4374 4375 4376
    }

    if (FAILED(hr = shader_set_function(object, device, WINED3D_SHADER_TYPE_HULL, 0)))
    {
        shader_cleanup(object);
        heap_free(object);
        return hr;
4377 4378
    }

4379 4380
    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);

4381 4382 4383 4384 4385 4386
    TRACE("Created hull shader %p.\n", object);
    *shader = object;

    return WINED3D_OK;
}

4387 4388
HRESULT CDECL wined3d_shader_create_ps(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
        void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4389 4390 4391 4392
{
    struct wined3d_shader *object;
    HRESULT hr;

4393 4394
    TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
            device, desc, parent, parent_ops, shader);
4395

4396
    if (!(object = heap_alloc_zero(sizeof(*object))))
4397 4398
        return E_OUTOFMEMORY;

4399
    if (FAILED(hr = pixel_shader_init(object, device, desc, parent, parent_ops)))
4400 4401
    {
        WARN("Failed to initialize pixel shader, hr %#x.\n", hr);
4402
        heap_free(object);
4403 4404 4405
        return hr;
    }

4406 4407
    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);

4408 4409 4410 4411 4412 4413
    TRACE("Created pixel shader %p.\n", object);
    *shader = object;

    return WINED3D_OK;
}

4414 4415
HRESULT CDECL wined3d_shader_create_vs(struct wined3d_device *device, const struct wined3d_shader_desc *desc,
        void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4416 4417 4418 4419
{
    struct wined3d_shader *object;
    HRESULT hr;

4420 4421
    TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
            device, desc, parent, parent_ops, shader);
4422

4423
    if (!(object = heap_alloc_zero(sizeof(*object))))
4424 4425
        return E_OUTOFMEMORY;

4426
    if (FAILED(hr = vertex_shader_init(object, device, desc, parent, parent_ops)))
4427 4428
    {
        WARN("Failed to initialize vertex shader, hr %#x.\n", hr);
4429
        heap_free(object);
4430 4431 4432
        return hr;
    }

4433 4434
    wined3d_cs_init_object(device->cs, wined3d_shader_init_object, object);

4435 4436 4437 4438 4439
    TRACE("Created vertex shader %p.\n", object);
    *shader = object;

    return WINED3D_OK;
}