Commit c3025582 authored by Alexandre Julliard's avatar Alexandre Julliard

vkd3d: Import upstream release 1.5.

parent c738be12
......@@ -16,6 +16,7 @@ Isabella Bosia
Jactry Zeng
Joshua Ashton
Józef Kucia
Martin Storsjö
Matteo Bruni
Nikolay Sivov
Philip Rebohle
......
#define VKD3D_VCS_ID " (git 9d4df5e70468)"
#define VKD3D_VCS_ID " (git 56b2f56b8631)"
......@@ -61,6 +61,9 @@ enum vkd3d_api_version
VKD3D_API_VERSION_1_2,
VKD3D_API_VERSION_1_3,
VKD3D_API_VERSION_1_4,
VKD3D_API_VERSION_1_5,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION),
};
typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event);
......
......@@ -46,6 +46,9 @@ enum vkd3d_shader_api_version
VKD3D_SHADER_API_VERSION_1_2,
VKD3D_SHADER_API_VERSION_1_3,
VKD3D_SHADER_API_VERSION_1_4,
VKD3D_SHADER_API_VERSION_1_5,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION),
};
/** The type of a chained structure. */
......@@ -99,6 +102,23 @@ enum vkd3d_shader_compile_option_buffer_uav
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_BUFFER_UAV),
};
/**
* Determines how typed UAVs are declared.
* \since 1.5
*/
enum vkd3d_shader_compile_option_typed_uav
{
/** Use R32(u)i/R32f format for UAVs which are read from. This is the default value. */
VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_R32 = 0x00000000,
/**
* Use Unknown format for UAVs which are read from. This should only be set if
* shaderStorageImageReadWithoutFormat is enabled in the target environment.
*/
VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_UNKNOWN = 0x00000001,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV),
};
enum vkd3d_shader_compile_option_formatting_flags
{
VKD3D_SHADER_COMPILE_OPTION_FORMATTING_NONE = 0x00000000,
......@@ -127,6 +147,8 @@ enum vkd3d_shader_compile_option_name
VKD3D_SHADER_COMPILE_OPTION_FORMATTING = 0x00000003,
/** \a value is a member of enum vkd3d_shader_api_version. \since 1.3 */
VKD3D_SHADER_COMPILE_OPTION_API_VERSION = 0x00000004,
/** \a value is a member of enum vkd3d_shader_compile_option_typed_uav. \since 1.5 */
VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV = 0x00000005,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME),
};
......
......@@ -126,7 +126,11 @@ struct hlsl_type
unsigned int dimy;
union
{
struct list *elements;
struct
{
struct hlsl_struct_field *fields;
size_t field_count;
} record;
struct
{
struct hlsl_type *type;
......@@ -147,7 +151,6 @@ struct hlsl_semantic
struct hlsl_struct_field
{
struct list entry;
struct vkd3d_shader_location loc;
struct hlsl_type *type;
const char *name;
......@@ -227,6 +230,8 @@ struct hlsl_src
#define HLSL_MODIFIERS_MAJORITY_MASK (HLSL_MODIFIER_ROW_MAJOR | HLSL_MODIFIER_COLUMN_MAJOR)
#define HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT 0
struct hlsl_reg_reservation
{
char type;
......@@ -371,6 +376,10 @@ struct hlsl_ir_swizzle
struct hlsl_deref
{
struct hlsl_ir_var *var;
unsigned int path_len;
struct hlsl_src *path;
struct hlsl_src offset;
};
......@@ -384,6 +393,7 @@ enum hlsl_resource_load_type
{
HLSL_RESOURCE_LOAD,
HLSL_RESOURCE_SAMPLE,
HLSL_RESOURCE_SAMPLE_LOD,
HLSL_RESOURCE_GATHER_RED,
HLSL_RESOURCE_GATHER_GREEN,
HLSL_RESOURCE_GATHER_BLUE,
......@@ -395,8 +405,7 @@ struct hlsl_ir_resource_load
struct hlsl_ir_node node;
enum hlsl_resource_load_type load_type;
struct hlsl_deref resource, sampler;
struct hlsl_src coords;
struct hlsl_src texel_offset;
struct hlsl_src coords, lod, texel_offset;
};
struct hlsl_ir_store
......@@ -714,6 +723,9 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out);
bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, struct hlsl_deref *other);
void hlsl_cleanup_deref(struct hlsl_deref *deref);
void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new);
void hlsl_free_instr(struct hlsl_ir_node *node);
......@@ -726,6 +738,9 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha
struct hlsl_type *hlsl_get_type(struct hlsl_scope *scope, const char *name, bool recursive);
struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name);
struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, const struct hlsl_type *type,
struct hlsl_ir_node *idx);
struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, unsigned int array_size);
struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1,
struct hlsl_ir_node *arg2);
......@@ -742,17 +757,32 @@ struct hlsl_ir_if *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condit
struct hlsl_ir_constant *hlsl_new_int_constant(struct hlsl_ctx *ctx, int n,
const struct vkd3d_shader_location *loc);
struct hlsl_ir_jump *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct vkd3d_shader_location loc);
struct hlsl_ir_load *hlsl_new_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct hlsl_ir_node *offset,
struct hlsl_type *type, struct vkd3d_shader_location loc);
struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc);
struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, struct hlsl_type *data_type,
enum hlsl_resource_load_type type, struct hlsl_ir_var *resource, struct hlsl_ir_node *resource_offset,
struct hlsl_ir_var *sampler, struct hlsl_ir_node *sampler_offset, struct hlsl_ir_node *coords,
struct hlsl_ir_node *texel_offset, const struct vkd3d_shader_location *loc);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
struct vkd3d_shader_location loc);
struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc);
struct hlsl_ir_load *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block,
const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc);
struct hlsl_ir_store *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs);
struct hlsl_ir_store *hlsl_new_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct hlsl_ir_node *offset,
struct hlsl_ir_node *rhs, unsigned int writemask, struct vkd3d_shader_location loc);
struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct list *fields);
struct hlsl_ir_store *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs,
struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc);
struct hlsl_ir_store *hlsl_new_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block,
const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs);
struct hlsl_ir_resource_load *hlsl_new_resource_load(struct hlsl_ctx *ctx, struct hlsl_type *data_type,
enum hlsl_resource_load_type type, struct hlsl_deref *resource, struct hlsl_deref *sampler,
struct hlsl_ir_node *coords, struct hlsl_ir_node *texel_offset, const struct vkd3d_shader_location *loc);
struct hlsl_ir_resource_load *hlsl_new_sample_lod(struct hlsl_ctx *ctx, struct hlsl_type *data_type,
struct hlsl_deref *resource, struct hlsl_deref *sampler, struct hlsl_ir_node *coords,
struct hlsl_ir_node *texel_offset, struct hlsl_ir_node *lod, const struct vkd3d_shader_location *loc);
struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_location loc);
struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name,
struct hlsl_struct_field *fields, size_t field_count);
struct hlsl_ir_swizzle *hlsl_new_swizzle(struct hlsl_ctx *ctx, DWORD s, unsigned int components,
struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc);
struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type,
......@@ -765,8 +795,6 @@ struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr
struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type,
const struct vkd3d_shader_location loc, const struct hlsl_semantic *semantic, unsigned int modifiers,
const struct hlsl_reg_reservation *reg_reservation);
struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
const struct vkd3d_shader_location loc);
void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc,
enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5);
......@@ -784,10 +812,14 @@ bool hlsl_scope_add_type(struct hlsl_scope *scope, struct hlsl_type *type);
struct hlsl_type *hlsl_type_clone(struct hlsl_ctx *ctx, struct hlsl_type *old,
unsigned int default_majority, unsigned int modifiers);
unsigned int hlsl_type_component_count(struct hlsl_type *type);
unsigned int hlsl_type_component_count(const struct hlsl_type *type);
unsigned int hlsl_type_get_array_element_reg_size(const struct hlsl_type *type);
unsigned int hlsl_compute_component_offset(struct hlsl_ctx *ctx, struct hlsl_type *type,
unsigned int idx, struct hlsl_type **comp_type);
struct hlsl_type *hlsl_type_get_component_type(struct hlsl_ctx *ctx, struct hlsl_type *type,
unsigned int index);
bool hlsl_type_is_row_major(const struct hlsl_type *type);
unsigned int hlsl_type_minor_size(const struct hlsl_type *type);
unsigned int hlsl_type_major_size(const struct hlsl_type *type);
unsigned int hlsl_type_element_count(const struct hlsl_type *type);
unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset);
bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2);
......@@ -796,11 +828,14 @@ unsigned int hlsl_combine_writemasks(unsigned int first, unsigned int second);
unsigned int hlsl_map_swizzle(unsigned int swizzle, unsigned int writemask);
unsigned int hlsl_swizzle_from_writemask(unsigned int writemask);
bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
unsigned int *start, unsigned int *count);
bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset);
unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref);
struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref);
bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context);
bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg);
......
......@@ -369,7 +369,143 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
return true;
}
bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
enum hlsl_base_type type = dst->node.data_type->base_type;
unsigned int k;
assert(type == src1->node.data_type->base_type);
assert(type == src2->node.data_type->base_type);
for (k = 0; k < dst->node.data_type->dimx; ++k)
{
switch (type)
{
case HLSL_TYPE_INT:
dst->value[k].i = max(src1->value[k].i, src2->value[k].i);
break;
case HLSL_TYPE_UINT:
dst->value[k].u = max(src1->value[k].u, src2->value[k].u);
break;
default:
FIXME("Fold max for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type));
return false;
}
}
return true;
}
static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
enum hlsl_base_type type = dst->node.data_type->base_type;
unsigned int k;
assert(type == src1->node.data_type->base_type);
assert(type == src2->node.data_type->base_type);
for (k = 0; k < dst->node.data_type->dimx; ++k)
{
switch (type)
{
case HLSL_TYPE_INT:
dst->value[k].i = min(src1->value[k].i, src2->value[k].i);
break;
case HLSL_TYPE_UINT:
dst->value[k].u = min(src1->value[k].u, src2->value[k].u);
break;
default:
FIXME("Fold min for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type));
return false;
}
}
return true;
}
static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
enum hlsl_base_type type = dst->node.data_type->base_type;
unsigned int k;
assert(type == src1->node.data_type->base_type);
assert(type == src2->node.data_type->base_type);
for (k = 0; k < dst->node.data_type->dimx; ++k)
{
switch (type)
{
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
dst->value[k].u = src1->value[k].u ^ src2->value[k].u;
break;
default:
FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type));
return false;
}
}
return true;
}
static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
enum hlsl_base_type type = dst->node.data_type->base_type;
unsigned int k;
assert(type == src1->node.data_type->base_type);
assert(type == src2->node.data_type->base_type);
for (k = 0; k < dst->node.data_type->dimx; ++k)
{
switch (type)
{
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
dst->value[k].u = src1->value[k].u & src2->value[k].u;
break;
default:
FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type));
return false;
}
}
return true;
}
static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_ir_constant *dst,
struct hlsl_ir_constant *src1, struct hlsl_ir_constant *src2)
{
enum hlsl_base_type type = dst->node.data_type->base_type;
unsigned int k;
assert(type == src1->node.data_type->base_type);
assert(type == src2->node.data_type->base_type);
for (k = 0; k < dst->node.data_type->dimx; ++k)
{
switch (type)
{
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
dst->value[k].u = src1->value[k].u | src2->value[k].u;
break;
default:
FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst->node.data_type));
return false;
}
}
return true;
}
bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
struct hlsl_ir_constant *arg1, *arg2 = NULL, *res;
struct hlsl_ir_expr *expr;
......@@ -430,6 +566,26 @@ bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void
success = fold_mod(ctx, res, arg1, arg2);
break;
case HLSL_OP2_MAX:
success = fold_max(ctx, res, arg1, arg2);
break;
case HLSL_OP2_MIN:
success = fold_min(ctx, res, arg1, arg2);
break;
case HLSL_OP2_BIT_XOR:
success = fold_bit_xor(ctx, res, arg1, arg2);
break;
case HLSL_OP2_BIT_AND:
success = fold_bit_and(ctx, res, arg1, arg2);
break;
case HLSL_OP2_BIT_OR:
success = fold_bit_or(ctx, res, arg1, arg2);
break;
default:
FIXME("Fold \"%s\" expression.\n", debug_hlsl_expr_op(expr->op));
success = false;
......@@ -447,3 +603,32 @@ bool hlsl_fold_constants(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void
}
return success;
}
bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
struct hlsl_ir_constant *value, *res;
struct hlsl_ir_swizzle *swizzle;
unsigned int i, swizzle_bits;
if (instr->type != HLSL_IR_SWIZZLE)
return false;
swizzle = hlsl_ir_swizzle(instr);
if (swizzle->val.node->type != HLSL_IR_CONSTANT)
return false;
value = hlsl_ir_constant(swizzle->val.node);
if (!(res = hlsl_alloc(ctx, sizeof(*res))))
return false;
init_node(&res->node, HLSL_IR_CONSTANT, instr->data_type, instr->loc);
swizzle_bits = swizzle->swizzle;
for (i = 0; i < swizzle->node.data_type->dimx; ++i)
{
res->value[i] = value->value[swizzle_bits & 3];
swizzle_bits >>= 2;
}
list_add_before(&swizzle->node.entry, &res->node.entry);
hlsl_replace_node(&swizzle->node, &res->node);
return true;
}
......@@ -243,28 +243,33 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ
{
const struct hlsl_type *array_type = get_array_type(type);
unsigned int array_size = get_array_size(type);
struct hlsl_struct_field *field;
unsigned int field_count = 0;
size_t fields_offset = 0;
size_t i;
if (type->bytecode_offset)
return;
if (array_type->type == HLSL_CLASS_STRUCT)
{
LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry)
field_count = array_type->e.record.field_count;
for (i = 0; i < field_count; ++i)
{
struct hlsl_struct_field *field = &array_type->e.record.fields[i];
field->name_bytecode_offset = put_string(buffer, field->name);
write_sm1_type(buffer, field->type, ctab_start);
}
fields_offset = bytecode_get_size(buffer) - ctab_start;
LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry)
for (i = 0; i < field_count; ++i)
{
struct hlsl_struct_field *field = &array_type->e.record.fields[i];
put_u32(buffer, field->name_bytecode_offset - ctab_start);
put_u32(buffer, field->type->bytecode_offset - ctab_start);
++field_count;
}
}
......
......@@ -355,7 +355,7 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
const struct hlsl_profile_info *profile = ctx->profile;
unsigned int field_count = 0, array_size = 0;
size_t fields_offset = 0, name_offset = 0;
struct hlsl_struct_field *field;
size_t i;
if (type->bytecode_offset)
return;
......@@ -368,20 +368,25 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
if (array_type->type == HLSL_CLASS_STRUCT)
{
LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry)
field_count = array_type->e.record.field_count;
for (i = 0; i < field_count; ++i)
{
struct hlsl_struct_field *field = &array_type->e.record.fields[i];
field->name_bytecode_offset = put_string(buffer, field->name);
write_sm4_type(ctx, buffer, field->type);
}
fields_offset = bytecode_get_size(buffer);
LIST_FOR_EACH_ENTRY(field, array_type->e.elements, struct hlsl_struct_field, entry)
for (i = 0; i < field_count; ++i)
{
struct hlsl_struct_field *field = &array_type->e.record.fields[i];
put_u32(buffer, field->name_bytecode_offset);
put_u32(buffer, field->type->bytecode_offset);
put_u32(buffer, field->reg_offset);
++field_count;
}
}
......@@ -1314,6 +1319,25 @@ static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d
write_sm4_instruction(buffer, &instr);
}
/* dp# instructions don't map the swizzle. */
static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode,
const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
{
struct sm4_instruction instr;
memset(&instr, 0, sizeof(instr));
instr.opcode = opcode;
sm4_dst_from_node(&instr.dsts[0], dst);
instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL);
sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL);
instr.src_count = 2;
write_sm4_instruction(buffer, &instr);
}
static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer,
enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx,
const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
......@@ -1440,9 +1464,36 @@ static bool type_is_float(const struct hlsl_type *type)
return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF;
}
static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx,
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr,
const struct hlsl_ir_node *arg, uint32_t mask)
{
struct sm4_instruction instr;
memset(&instr, 0, sizeof(instr));
instr.opcode = VKD3D_SM4_OP_AND;
sm4_dst_from_node(&instr.dsts[0], &expr->node);
instr.dst_count = 1;
sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask);
instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE;
instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST;
instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR;
instr.srcs[1].reg.immconst_uint[0] = mask;
instr.src_count = 2;
write_sm4_instruction(buffer, &instr);
}
static void write_sm4_cast(struct hlsl_ctx *ctx,
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr)
{
static const union
{
uint32_t u;
float f;
} one = { .f = 1.0 };
const struct hlsl_ir_node *arg1 = expr->operands[0].node;
const struct hlsl_type *dst_type = expr->node.data_type;
const struct hlsl_type *src_type = arg1->data_type;
......@@ -1469,7 +1520,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx,
break;
case HLSL_TYPE_BOOL:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from bool to float.");
write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u);
break;
case HLSL_TYPE_DOUBLE:
......@@ -1495,7 +1546,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx,
break;
case HLSL_TYPE_BOOL:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from bool to int.");
write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1);
break;
case HLSL_TYPE_DOUBLE:
......@@ -1521,7 +1572,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx,
break;
case HLSL_TYPE_BOOL:
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from bool to uint.");
write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1);
break;
case HLSL_TYPE_DOUBLE:
......@@ -1602,6 +1653,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0);
break;
case HLSL_OP1_LOGIC_NOT:
assert(dst_type->base_type == HLSL_TYPE_BOOL);
write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0);
break;
case HLSL_OP1_NEG:
switch (dst_type->base_type)
{
......@@ -1679,6 +1735,38 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
}
break;
case HLSL_OP2_DOT:
switch (dst_type->base_type)
{
case HLSL_TYPE_FLOAT:
switch (arg1->data_type->dimx)
{
case 4:
write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2);
break;
case 3:
write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2);
break;
case 2:
write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2);
break;
case 1:
assert(0);
break;
default:
assert(0);
}
break;
default:
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer);
}
break;
case HLSL_OP2_EQUAL:
{
const struct hlsl_type *src_type = arg1->data_type;
......@@ -1763,6 +1851,16 @@ static void write_sm4_expr(struct hlsl_ctx *ctx,
break;
}
case HLSL_OP2_LOGIC_AND:
assert(dst_type->base_type == HLSL_TYPE_BOOL);
write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2);
break;
case HLSL_OP2_LOGIC_OR:
assert(dst_type->base_type == HLSL_TYPE_BOOL);
write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2);
break;
case HLSL_OP2_LSHIFT:
assert(type_is_integer(dst_type));
assert(dst_type->base_type != HLSL_TYPE_BOOL);
......@@ -1984,11 +2082,23 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx,
const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
const struct hlsl_ir_node *coords = load->coords.node;
if (resource_type->type != HLSL_CLASS_OBJECT)
{
assert(resource_type->type == HLSL_CLASS_ARRAY || resource_type->type == HLSL_CLASS_STRUCT);
hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable.");
return;
}
if (load->sampler.var)
{
const struct hlsl_type *sampler_type = load->sampler.var->data_type;
assert(sampler_type->type == HLSL_CLASS_OBJECT);
if (sampler_type->type != HLSL_CLASS_OBJECT)
{
assert(sampler_type->type == HLSL_CLASS_ARRAY || sampler_type->type == HLSL_CLASS_STRUCT);
hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable.");
return;
}
assert(sampler_type->base_type == HLSL_TYPE_SAMPLER);
assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC);
......@@ -2037,6 +2147,10 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx,
write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
&load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset);
break;
case HLSL_RESOURCE_SAMPLE_LOD:
hlsl_fixme(ctx, &load->node.loc, "SM4 sample-LOD expression.");
break;
}
}
......
......@@ -206,7 +206,7 @@ static enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval(enum vkd3d_
#define VKD3D_SPIRV_VERSION 0x00010000
#define VKD3D_SPIRV_GENERATOR_ID 18
#define VKD3D_SPIRV_GENERATOR_VERSION 4
#define VKD3D_SPIRV_GENERATOR_VERSION 5
#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID)
struct vkd3d_spirv_stream
......@@ -2228,6 +2228,7 @@ struct vkd3d_dxbc_compiler
bool strip_debug;
bool ssbo_uavs;
bool uav_read_without_format;
struct rb_tree symbol_table;
uint32_t temp_id;
......@@ -2379,6 +2380,15 @@ struct vkd3d_dxbc_compiler *vkd3d_dxbc_compiler_create(const struct vkd3d_shader
case VKD3D_SHADER_COMPILE_OPTION_API_VERSION:
break;
case VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV:
if (option->value == VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_R32)
compiler->uav_read_without_format = false;
else if (option->value == VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_UNKNOWN)
compiler->uav_read_without_format = true;
else
WARN("Ignoring unrecognised value %#x for option %#x.\n", option->value, option->name);
break;
}
}
......@@ -5856,14 +5866,18 @@ static uint32_t vkd3d_dxbc_compiler_get_image_type_id(struct vkd3d_dxbc_compiler
const struct vkd3d_shader_descriptor_info *d;
uint32_t sampled_type_id;
SpvImageFormat format;
bool uav_read;
format = SpvImageFormatUnknown;
if (reg->type == VKD3DSPR_UAV)
{
d = vkd3d_dxbc_compiler_get_descriptor_info(compiler,
VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range);
if (raw_structured || (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ))
uav_read = !!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ);
if (raw_structured || (uav_read && !compiler->uav_read_without_format))
format = image_format_for_image_read(data_type);
else if (uav_read)
vkd3d_spirv_enable_capability(builder, SpvCapabilityStorageImageReadWithoutFormat);
}
sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1);
......@@ -5962,7 +5976,7 @@ static void vkd3d_dxbc_compiler_emit_resource_declaration(struct vkd3d_dxbc_comp
const struct vkd3d_shader_resource *resource, enum vkd3d_shader_resource_type resource_type,
enum vkd3d_data_type resource_data_type, unsigned int structure_stride, bool raw)
{
struct vkd3d_descriptor_variable_info var_info, counter_var_info;
struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0};
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
SpvStorageClass storage_class = SpvStorageClassUniformConstant;
uint32_t counter_type_id, type_id, var_id, counter_var_id = 0;
......
......@@ -118,6 +118,7 @@ enum vkd3d_shader_error
VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS = 5019,
VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE = 5020,
VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO = 5021,
VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF = 5022,
VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300,
VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301,
......
......@@ -369,7 +369,10 @@ static void *vkd3d_fence_worker_main(void *arg)
}
if (worker->should_exit)
{
vkd3d_mutex_unlock(&worker->mutex);
break;
}
old_fences_size = cur_fences_size;
old_fences = cur_fences;
......@@ -424,20 +427,11 @@ static HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
return hresult_from_errno(rc);
}
if ((rc = vkd3d_cond_init(&worker->fence_destruction_cond)))
{
ERR("Failed to initialize condition variable, error %d.\n", rc);
vkd3d_mutex_destroy(&worker->mutex);
vkd3d_cond_destroy(&worker->cond);
return hresult_from_errno(rc);
}
if (FAILED(hr = vkd3d_create_thread(device->vkd3d_instance,
vkd3d_fence_worker_main, worker, &worker->thread)))
{
vkd3d_mutex_destroy(&worker->mutex);
vkd3d_cond_destroy(&worker->cond);
vkd3d_cond_destroy(&worker->fence_destruction_cond);
}
return hr;
......@@ -467,7 +461,6 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
vkd3d_mutex_destroy(&worker->mutex);
vkd3d_cond_destroy(&worker->cond);
vkd3d_cond_destroy(&worker->fence_destruction_cond);
vkd3d_free(worker->fences);
......@@ -858,7 +851,7 @@ static void d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence)
}
else
{
current->latch = true;
*current->latch = true;
signal_null_event_cond = true;
}
}
......@@ -1162,7 +1155,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i
{
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
unsigned int i;
bool *latch;
bool latch = false;
int rc;
TRACE("iface %p, value %#"PRIx64", event %p.\n", iface, value, event);
......@@ -1203,8 +1196,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i
fence->events[fence->event_count].value = value;
fence->events[fence->event_count].event = event;
fence->events[fence->event_count].latch = false;
latch = &fence->events[fence->event_count].latch;
fence->events[fence->event_count].latch = &latch;
++fence->event_count;
/* If event is NULL, we need to block until the fence value completes.
......@@ -1213,7 +1205,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i
* and signal a condition variable instead of calling external signal_event callback. */
if (!event)
{
while (!*latch)
while (!latch)
vkd3d_cond_wait(&fence->null_event_cond, &fence->mutex);
}
......@@ -6804,22 +6796,15 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if
goto done;
}
vkd3d_mutex_unlock(&fence->mutex);
/* This is the critical part required to support out-of-order signal.
* Normally we would be able to submit waits and signals out of order, but
* we don't have virtualized queues in Vulkan, so we need to handle the case
* where multiple queues alias over the same physical queue, so effectively,
* we need to manage out-of-order submits ourselves. */
if (!command_queue->ops_count)
hr = d3d12_device_add_blocked_command_queues(command_queue->device, &command_queue, 1);
if (FAILED(hr))
goto done;
if (!(op = d3d12_command_queue_require_space_locked(command_queue)))
{
vkd3d_mutex_unlock(&fence->mutex);
hr = E_OUTOFMEMORY;
goto done;
}
......@@ -6829,6 +6814,16 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if
d3d12_fence_incref(fence);
/* Add the queue to the blocked list after writing the op to ensure the queue isn't
* removed again in another thread because it has no ops. */
if (command_queue->ops_count == 1)
hr = d3d12_device_add_blocked_command_queues(command_queue->device, &command_queue, 1);
/* The fence must remain locked until the op is created and the queue is added to the blocked list,
* because if an unblocking d3d12_fence_Signal() call occurs on another thread before the above
* work is done, flushing will be delayed until the next signal, if one occurs at all. */
vkd3d_mutex_unlock(&fence->mutex);
done:
vkd3d_mutex_unlock(&command_queue->op_mutex);
return hr;
......
......@@ -1391,6 +1391,45 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des
limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS);
}
static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct d3d12_device *device)
{
const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs;
const struct vkd3d_format *format;
VkFormatProperties properties;
unsigned int i;
static const DXGI_FORMAT additional_formats[] =
{
DXGI_FORMAT_R32G32B32A32_FLOAT,
DXGI_FORMAT_R32G32B32A32_UINT,
DXGI_FORMAT_R32G32B32A32_SINT,
DXGI_FORMAT_R16G16B16A16_FLOAT,
DXGI_FORMAT_R16G16B16A16_UINT,
DXGI_FORMAT_R16G16B16A16_SINT,
DXGI_FORMAT_R8G8B8A8_UNORM,
DXGI_FORMAT_R8G8B8A8_UINT,
DXGI_FORMAT_R8G8B8A8_SINT,
DXGI_FORMAT_R16_FLOAT,
DXGI_FORMAT_R16_UINT,
DXGI_FORMAT_R16_SINT,
DXGI_FORMAT_R8_UNORM,
DXGI_FORMAT_R8_UINT,
DXGI_FORMAT_R8_SINT,
};
for (i = 0; i < ARRAY_SIZE(additional_formats); ++i)
{
format = vkd3d_get_format(device, additional_formats[i], false);
assert(format);
VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device, format->vk_format, &properties));
if (!((properties.linearTilingFeatures | properties.optimalTilingFeatures) & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT))
return false;
}
return true;
}
static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
const struct vkd3d_device_create_info *create_info,
struct vkd3d_physical_device_info *physical_device_info,
......@@ -1425,6 +1464,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties;
vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect;
vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries;
vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat;
vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1);
device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64;
......@@ -1455,7 +1495,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
else
device->feature_options.ResourceBindingTier = D3D12_RESOURCE_BINDING_TIER_3;
device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageExtendedFormats;
device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat
&& d3d12_device_supports_typed_uav_load_additional_formats(device);
/* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */
device->feature_options.ROVsSupported = FALSE;
/* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */
......
......@@ -1944,6 +1944,13 @@ struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12Pipeline
return impl_from_ID3D12PipelineState(iface);
}
static inline unsigned int typed_uav_compile_option(const struct d3d12_device *device)
{
return device->vk_info.uav_read_without_format
? VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_UNKNOWN
: VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV_READ_FORMAT_R32;
}
static HRESULT create_shader_stage(struct d3d12_device *device,
struct VkPipelineShaderStageCreateInfo *stage_desc, enum VkShaderStageFlagBits stage,
const D3D12_SHADER_BYTECODE *code, const struct vkd3d_shader_interface_info *shader_interface)
......@@ -1955,9 +1962,10 @@ static HRESULT create_shader_stage(struct d3d12_device *device,
VkResult vr;
int ret;
static const struct vkd3d_shader_compile_option options[] =
const struct vkd3d_shader_compile_option options[] =
{
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_4},
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_5},
{VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)},
};
stage_desc->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
......@@ -2001,14 +2009,15 @@ static HRESULT create_shader_stage(struct d3d12_device *device,
return S_OK;
}
static int vkd3d_scan_dxbc(const D3D12_SHADER_BYTECODE *code,
static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER_BYTECODE *code,
struct vkd3d_shader_scan_descriptor_info *descriptor_info)
{
struct vkd3d_shader_compile_info compile_info;
static const struct vkd3d_shader_compile_option options[] =
const struct vkd3d_shader_compile_option options[] =
{
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_4},
{VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_5},
{VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)},
};
compile_info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO;
......@@ -2170,7 +2179,7 @@ static HRESULT d3d12_pipeline_state_find_and_init_uav_counters(struct d3d12_pipe
shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO;
shader_info.next = NULL;
if ((ret = vkd3d_scan_dxbc(code, &shader_info)) < 0)
if ((ret = vkd3d_scan_dxbc(device, code, &shader_info)) < 0)
{
WARN("Failed to scan shader bytecode, stage %#x, vkd3d result %d.\n", stage_flags, ret);
return hresult_from_vkd3d_result(ret);
......
......@@ -143,6 +143,8 @@ struct vkd3d_vulkan_info
bool rasterization_stream;
bool transform_feedback_queries;
bool uav_read_without_format;
bool vertex_attrib_zero_divisor;
unsigned int max_vertex_attrib_divisor;
......@@ -346,7 +348,6 @@ struct vkd3d_fence_worker
union vkd3d_thread_handle thread;
struct vkd3d_mutex mutex;
struct vkd3d_cond cond;
struct vkd3d_cond fence_destruction_cond;
bool should_exit;
size_t fence_count;
......@@ -529,7 +530,7 @@ struct d3d12_fence
{
uint64_t value;
HANDLE event;
bool latch;
bool *latch;
} *events;
size_t events_size;
size_t event_count;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment