Commit cb723c6d authored by Zebediah Figura's avatar Zebediah Figura Committed by Alexandre Julliard

wined3d: Submit command buffers after 512 draw or dispatch commands.

This improves performance for the game "Grounded", on a AMD Radeon RX 6700 XT, with radv from Mesa 22.3.6. Testing was done with the "cb_access_map_w" option enabled, which also improves performance with the game by itself. Grounded generally makes about 4000 draw calls per frame, which seems not atypical. This change makes it submit at most an extra 8 times per frame, but in practice due to WINED3D_PERIODIC_SUBMIT_MAX_BUFFERS it submits less (usually only 2-3). The most demanding game I've seen made about 20,000 draw calls per frame, at which point the overhead of adapter_vk_draw_primitive() itself becomes a serious bottleneck. For such a game we would submit 40 more times per frame with these settings, although WINED3D_PERIODIC_SUBMIT_MAX_BUFFERS means we would likely submit less than that. In any case if submission itself becomes a bottleneck, we should offload it to a separate thread. Credit goes to Philip Rebohle and his work on DXVK for helping me to notice that periodic submission might make a difference.
parent e7db99d8
...@@ -1807,6 +1807,8 @@ static void adapter_vk_draw_primitive(struct wined3d_device *device, ...@@ -1807,6 +1807,8 @@ static void adapter_vk_draw_primitive(struct wined3d_device *device,
context_vk->c.transform_feedback_active = 0; context_vk->c.transform_feedback_active = 0;
} }
++context_vk->command_buffer_work_count;
context_release(&context_vk->c); context_release(&context_vk->c);
} }
...@@ -1851,6 +1853,8 @@ static void adapter_vk_dispatch_compute(struct wined3d_device *device, ...@@ -1851,6 +1853,8 @@ static void adapter_vk_dispatch_compute(struct wined3d_device *device,
VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_CALL(vkCmdPipelineBarrier(vk_command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, NULL, 0, NULL, 0, NULL)); VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, 0, NULL, 0, NULL, 0, NULL));
++context_vk->command_buffer_work_count;
context_release(&context_vk->c); context_release(&context_vk->c);
} }
......
...@@ -1771,6 +1771,37 @@ void wined3d_context_vk_cleanup(struct wined3d_context_vk *context_vk) ...@@ -1771,6 +1771,37 @@ void wined3d_context_vk_cleanup(struct wined3d_context_vk *context_vk)
wined3d_context_cleanup(&context_vk->c); wined3d_context_cleanup(&context_vk->c);
} }
/* In general we only submit when necessary or when a frame ends. However,
* applications which do a lot of work per frame can end up with the GPU idle
* for long periods of time while the CPU is building commands, and drivers may
* choose to reclock the GPU to a lower power level if they detect it being idle
* for that long.
*
* This may also help performance simply by virtue of allowing more parallelism
* between the GPU and CPU, although no clear evidence of that has been seen
* yet. */
#define WINED3D_PERIODIC_SUBMIT_WORK_COUNT 512
#define WINED3D_PERIODIC_SUBMIT_MAX_BUFFERS 3
static bool should_periodic_submit(struct wined3d_context_vk *context_vk)
{
uint64_t busy_count;
if (context_vk->command_buffer_work_count < WINED3D_PERIODIC_SUBMIT_WORK_COUNT)
return false;
/* The point of periodic submit is to keep the GPU busy, so if it's already
* busy with 4 or more command buffers, don't submit another one now. */
busy_count = context_vk->current_command_buffer.id - context_vk->completed_command_buffer_id - 1;
if (busy_count > WINED3D_PERIODIC_SUBMIT_MAX_BUFFERS)
return false;
TRACE("Periodically submitting command buffer, %u draw/dispatch commands since last buffer, %I64u currently busy.\n",
context_vk->command_buffer_work_count, busy_count);
return true;
}
VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk *context_vk) VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk *context_vk)
{ {
struct wined3d_device_vk *device_vk = wined3d_device_vk(context_vk->c.device); struct wined3d_device_vk *device_vk = wined3d_device_vk(context_vk->c.device);
...@@ -1785,7 +1816,7 @@ VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk ...@@ -1785,7 +1816,7 @@ VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk
buffer = &context_vk->current_command_buffer; buffer = &context_vk->current_command_buffer;
if (buffer->vk_command_buffer) if (buffer->vk_command_buffer)
{ {
if (context_vk->retired_bo_size > WINED3D_RETIRED_BO_SIZE_THRESHOLD) if (context_vk->retired_bo_size > WINED3D_RETIRED_BO_SIZE_THRESHOLD || should_periodic_submit(context_vk))
wined3d_context_vk_submit_command_buffer(context_vk, 0, NULL, NULL, 0, NULL); wined3d_context_vk_submit_command_buffer(context_vk, 0, NULL, NULL, 0, NULL);
else else
{ {
...@@ -1854,6 +1885,8 @@ VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk ...@@ -1854,6 +1885,8 @@ VkCommandBuffer wined3d_context_vk_get_command_buffer(struct wined3d_context_vk
wined3d_query_vk_resume(query_vk, context_vk); wined3d_query_vk_resume(query_vk, context_vk);
} }
context_vk->command_buffer_work_count = 0;
TRACE("Created new command buffer %p with id 0x%s.\n", TRACE("Created new command buffer %p with id 0x%s.\n",
buffer->vk_command_buffer, wine_dbgstr_longlong(buffer->id)); buffer->vk_command_buffer, wine_dbgstr_longlong(buffer->id));
......
...@@ -614,6 +614,9 @@ struct wined3d_context_vk ...@@ -614,6 +614,9 @@ struct wined3d_context_vk
struct wined3d_command_buffer_vk current_command_buffer; struct wined3d_command_buffer_vk current_command_buffer;
uint64_t completed_command_buffer_id; uint64_t completed_command_buffer_id;
VkDeviceSize retired_bo_size; VkDeviceSize retired_bo_size;
/* Number of draw or dispatch calls that have been recorded into the
* current command buffer. */
unsigned int command_buffer_work_count;
struct struct
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment