vertexshader.c 14.8 KB
Newer Older
1 2 3 4
/*
 * shaders implementation
 *
 * Copyright 2002-2003 Jason Edmeades
5
 * Copyright 2002-2003 Raphael Junqueira
6
 * Copyright 2004 Christian Costa
7
 * Copyright 2005 Oliver Stieber
8
 * Copyright 2006 Ivan Gyurdiev
9
 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
10
 * Copyright 2009 Henri Verbeet for CodeWeavers
11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 26 27 28 29
 */

#include "config.h"

#include <math.h>
30
#include <stdio.h>
31 32 33 34 35

#include "wined3d_private.h"

WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);

36
#define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
37

38 39
static void vshader_set_limits(IWineD3DVertexShaderImpl *This)
{
40 41
    DWORD shader_version = WINED3D_SHADER_VERSION(This->baseShader.reg_maps.shader_version.major,
            This->baseShader.reg_maps.shader_version.minor);
42
    IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
43

44 45 46
    This->baseShader.limits.texcoord = 0;
    This->baseShader.limits.attributes = 16;
    This->baseShader.limits.packed_input = 0;
47

48
    switch (shader_version)
49
    {
50 51
        case WINED3D_SHADER_VERSION(1,0):
        case WINED3D_SHADER_VERSION(1,1):
52 53 54 55 56 57 58 59 60
            This->baseShader.limits.temporary = 12;
            This->baseShader.limits.constant_bool = 0;
            This->baseShader.limits.constant_int = 0;
            This->baseShader.limits.address = 1;
            This->baseShader.limits.packed_output = 0;
            This->baseShader.limits.sampler = 0;
            This->baseShader.limits.label = 0;
            /* TODO: vs_1_1 has a minimum of 96 constants. What happens if a vs_1_1 shader is used
             * on a vs_3_0 capable card that has 256 constants? */
61
            This->baseShader.limits.constant_float = min(256, device->d3d_vshader_constantF);
62
            break;
63

64 65
        case WINED3D_SHADER_VERSION(2,0):
        case WINED3D_SHADER_VERSION(2,1):
66 67 68 69 70 71 72
            This->baseShader.limits.temporary = 12;
            This->baseShader.limits.constant_bool = 16;
            This->baseShader.limits.constant_int = 16;
            This->baseShader.limits.address = 1;
            This->baseShader.limits.packed_output = 0;
            This->baseShader.limits.sampler = 0;
            This->baseShader.limits.label = 16;
73
            This->baseShader.limits.constant_float = min(256, device->d3d_vshader_constantF);
74
            break;
75

76 77 78 79
        case WINED3D_SHADER_VERSION(4,0):
            FIXME("Using 3.0 limits for 4.0 shader\n");
            /* Fall through */

80
        case WINED3D_SHADER_VERSION(3,0):
81 82 83 84 85 86 87 88 89 90 91
            This->baseShader.limits.temporary = 32;
            This->baseShader.limits.constant_bool = 32;
            This->baseShader.limits.constant_int = 32;
            This->baseShader.limits.address = 1;
            This->baseShader.limits.packed_output = 12;
            This->baseShader.limits.sampler = 4;
            This->baseShader.limits.label = 16; /* FIXME: 2048 */
            /* DX10 cards on Windows advertise a d3d9 constant limit of 256 even though they are capable
             * of supporting much more(GL drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
             * wined3d-advertised maximum. Clamp the constant limit for <= 3.0 shaders to 256.s
             * use constant buffers */
92
            This->baseShader.limits.constant_float = min(256, device->d3d_vshader_constantF);
93
            break;
94

95 96 97 98 99 100 101 102
        default:
            This->baseShader.limits.temporary = 12;
            This->baseShader.limits.constant_bool = 16;
            This->baseShader.limits.constant_int = 16;
            This->baseShader.limits.address = 1;
            This->baseShader.limits.packed_output = 0;
            This->baseShader.limits.sampler = 0;
            This->baseShader.limits.label = 16;
103
            This->baseShader.limits.constant_float = min(256, device->d3d_vshader_constantF);
104 105 106
            FIXME("Unrecognized vertex shader version %u.%u\n",
                    This->baseShader.reg_maps.shader_version.major,
                    This->baseShader.reg_maps.shader_version.minor);
107
    }
108 109
}

110 111 112 113 114 115 116 117 118
static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
    if (usage_idx1 != usage_idx2) return FALSE;
    if (usage1 == usage2) return TRUE;
    if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
    if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;

    return FALSE;
}

119 120 121 122 123
BOOL vshader_get_input(IWineD3DVertexShader* iface, BYTE usage_req, BYTE usage_idx_req, unsigned int *regnum)
{
    IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
    WORD map = This->baseShader.reg_maps.input_registers;
    unsigned int i;
124

125 126 127
    for (i = 0; map; map >>= 1, ++i)
    {
        if (!(map & 1)) continue;
128

129 130
        if (match_usage(This->attributes[i].usage,
                This->attributes[i].usage_idx, usage_req, usage_idx_req))
131
        {
132 133 134 135 136 137 138
            *regnum = i;
            return TRUE;
        }
    }
    return FALSE;
}

139 140 141
/* *******************************************
   IWineD3DVertexShader IUnknown parts follow
   ******************************************* */
142
static HRESULT  WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
    TRACE("iface %p, riid %s, ppobj %p\n", iface, debugstr_guid(riid), ppobj);

    if (IsEqualGUID(riid, &IID_IWineD3DVertexShader)
            || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
            || IsEqualGUID(riid, &IID_IWineD3DBase)
            || IsEqualGUID(riid, &IID_IUnknown))
    {
        IUnknown_AddRef(iface);
        *ppobj = iface;
        return S_OK;
    }

    WARN("%s not implemented, returning E_NOINTERFACE\n", debugstr_guid(riid));

    *ppobj = NULL;
    return E_NOINTERFACE;
159 160
}

161
static ULONG  WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
162 163 164 165 166 167
    IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
    ULONG refcount = InterlockedIncrement(&This->baseShader.ref);

    TRACE("%p increasing refcount to %u\n", This, refcount);

    return refcount;
168 169
}

170
static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
171 172 173 174 175 176 177 178
    IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
    ULONG refcount = InterlockedDecrement(&This->baseShader.ref);

    TRACE("%p decreasing refcount to %u\n", This, refcount);

    if (!refcount)
    {
        shader_cleanup((IWineD3DBaseShader *)iface);
179
        This->baseShader.parent_ops->wined3d_object_destroyed(This->baseShader.parent);
180 181 182 183
        HeapFree(GetProcessHeap(), 0, This);
    }

    return refcount;
184 185 186 187 188 189
}

/* *******************************************
   IWineD3DVertexShader IWineD3DVertexShader parts follow
   ******************************************* */

190
static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
191
    IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
192

193
    *parent = This->baseShader.parent;
194 195
    IUnknown_AddRef(*parent);
    TRACE("(%p) : returning %p\n", This, *parent);
196
    return WINED3D_OK;
197
}
198

199
static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
200
    IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
201 202
    IWineD3DDevice_AddRef(This->baseShader.device);
    *pDevice = This->baseShader.device;
203
    TRACE("(%p) returning %p\n", This, *pDevice);
204
    return WINED3D_OK;
205 206
}

207
static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
208
    IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
209
    TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
210

211
    if (NULL == pData) {
212
        *pSizeOfData = This->baseShader.functionLength;
213
        return WINED3D_OK;
214
    }
215
    if (*pSizeOfData < This->baseShader.functionLength) {
216 217 218 219
        /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
         * than the required size we should write the required size and
         * return D3DERR_MOREDATA. That's not actually true. */
        return WINED3DERR_INVALIDCALL;
220
    }
221 222 223 224

    TRACE("(%p) : GetFunction copying to %p\n", This, pData);
    memcpy(pData, This->baseShader.function, This->baseShader.functionLength);

225
    return WINED3D_OK;
226 227
}

228 229
static HRESULT vertexshader_set_function(IWineD3DVertexShaderImpl *shader,
        const DWORD *byte_code, const struct wined3d_shader_signature *output_signature)
230
{
231 232
    IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)shader->baseShader.device;
    const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
233
    const struct wined3d_shader_frontend *fe;
234
    unsigned int i;
235
    HRESULT hr;
236
    shader_reg_maps *reg_maps = &shader->baseShader.reg_maps;
237

238
    TRACE("shader %p, byte_code %p, output_signature %p.\n", shader, byte_code, output_signature);
239

240
    fe = shader_select_frontend(*byte_code);
241 242 243 244 245
    if (!fe)
    {
        FIXME("Unable to find frontend for shader.\n");
        return WINED3DERR_INVALIDCALL;
    }
246 247 248
    shader->baseShader.frontend = fe;
    shader->baseShader.frontend_data = fe->shader_init(byte_code, output_signature);
    if (!shader->baseShader.frontend_data)
249 250 251 252
    {
        FIXME("Failed to initialize frontend.\n");
        return WINED3DERR_INVALIDCALL;
    }
253

254
    /* First pass: trace shader */
255
    if (TRACE_ON(d3d_shader)) shader_trace_init(fe, shader->baseShader.frontend_data, byte_code);
256

257
    /* Initialize immediate constant lists */
258 259 260
    list_init(&shader->baseShader.constantsF);
    list_init(&shader->baseShader.constantsB);
    list_init(&shader->baseShader.constantsI);
261

262
    /* Second pass: figure out registers used, semantics, etc.. */
263
    shader->min_rel_offset = device->d3d_vshader_constantF;
264 265 266
    shader->max_rel_offset = 0;
    hr = shader_get_registers_used((IWineD3DBaseShader *)shader, fe,
            reg_maps, shader->attributes, NULL, shader->output_signature,
267
            byte_code, device->d3d_vshader_constantF);
268
    if (hr != WINED3D_OK) return hr;
269

270 271 272 273 274 275
    if (output_signature)
    {
        for (i = 0; i < output_signature->element_count; ++i)
        {
            struct wined3d_shader_signature_element *e = &output_signature->elements[i];
            reg_maps->output_registers |= 1 << e->register_idx;
276
            shader->output_signature[e->register_idx] = *e;
277 278 279
        }
    }

280
    vshader_set_limits(shader);
281

282 283 284
    if (device->vs_selected_mode == SHADER_ARB
            && (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT)
            && shader->min_rel_offset <= shader->max_rel_offset)
285
    {
286 287
        if (shader->max_rel_offset - shader->min_rel_offset > 127)
        {
288 289
            FIXME("The difference between the minimum and maximum relative offset is > 127\n");
            FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
290 291 292 293 294 295 296 297 298 299 300 301 302
            FIXME("Min: %d, Max: %d\n", shader->min_rel_offset, shader->max_rel_offset);
        }
        else if (shader->max_rel_offset - shader->min_rel_offset > 63)
        {
            shader->rel_offset = shader->min_rel_offset + 63;
        }
        else if (shader->max_rel_offset > 63)
        {
            shader->rel_offset = shader->min_rel_offset;
        }
        else
        {
            shader->rel_offset = 0;
303 304
        }
    }
305 306
    shader->baseShader.load_local_constsF = shader->baseShader.reg_maps.usesrelconstF
            && !list_empty(&shader->baseShader.constantsF);
307

308
    /* copy the function ... because it will certainly be released by application */
309 310 311
    shader->baseShader.function = HeapAlloc(GetProcessHeap(), 0, shader->baseShader.functionLength);
    if (!shader->baseShader.function) return E_OUTOFMEMORY;
    memcpy(shader->baseShader.function, byte_code, shader->baseShader.functionLength);
312

313
    return WINED3D_OK;
314
}
315

316 317 318 319
/* Set local constants for d3d8 shaders */
static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
        UINT start_idx, const float *src_data, UINT count) {
    IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
320
    IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
321 322 323 324 325
    UINT i, end_idx;

    TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);

    end_idx = start_idx + count;
326
    if (end_idx > device->d3d_vshader_constantF)
327
    {
328 329
        WARN("end_idx %u > float constants limit %u\n", end_idx, device->d3d_vshader_constantF);
        end_idx = device->d3d_vshader_constantF;
330 331 332 333 334 335 336
    }

    for (i = start_idx; i < end_idx; ++i) {
        local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
        if (!lconst) return E_OUTOFMEMORY;

        lconst->idx = i;
337
        memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
338 339 340
        list_add_head(&This->baseShader.constantsF, &lconst->entry);
    }

341
    return WINED3D_OK;
342 343
}

344
static const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
345 346 347 348 349
{
    /*** IUnknown methods ***/
    IWineD3DVertexShaderImpl_QueryInterface,
    IWineD3DVertexShaderImpl_AddRef,
    IWineD3DVertexShaderImpl_Release,
350
    /*** IWineD3DBase methods ***/
351
    IWineD3DVertexShaderImpl_GetParent,
352
    /*** IWineD3DBaseShader methods ***/
353
    IWineD3DVertexShaderImpl_GetDevice,
354
    IWineD3DVertexShaderImpl_GetFunction,
355
    /*** IWineD3DVertexShader methods ***/
356
    IWIneD3DVertexShaderImpl_SetLocalConstantsF
357
};
358 359 360 361 362

void find_vs_compile_args(IWineD3DVertexShaderImpl *shader, IWineD3DStateBlockImpl *stateblock, struct vs_compile_args *args) {
    args->fog_src = stateblock->renderState[WINED3DRS_FOGTABLEMODE] == WINED3DFOG_NONE ? VS_FOG_COORD : VS_FOG_Z;
    args->swizzle_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.swizzle_map;
}
363 364 365

HRESULT vertexshader_init(IWineD3DVertexShaderImpl *shader, IWineD3DDeviceImpl *device,
        const DWORD *byte_code, const struct wined3d_shader_signature *output_signature,
366
        IUnknown *parent, const struct wined3d_parent_ops *parent_ops)
367 368 369 370 371 372
{
    HRESULT hr;

    if (!byte_code) return WINED3DERR_INVALIDCALL;

    shader->lpVtbl = &IWineD3DVertexShader_Vtbl;
373
    shader_init(&shader->baseShader, device, parent, parent_ops);
374

375
    hr = vertexshader_set_function(shader, byte_code, output_signature);
376 377 378 379 380 381 382 383 384
    if (FAILED(hr))
    {
        WARN("Failed to set function, hr %#x.\n", hr);
        shader_cleanup((IWineD3DBaseShader *)shader);
        return hr;
    }

    return WINED3D_OK;
}