Skip to content

Commit

Permalink
vulkan: support timers on async transfer queues
Browse files Browse the repository at this point in the history
This requires using host query resets, which requires a new extension.
The extension in question is also promoted to vulkan API version 1.2,
but for some reason, loading the function pointer under the old name
fails, even though the extension text seems to suggest that it should be
available under the new name as well. (But I think this might be a
loader bug). Work around it by just annoyingly introducing the concept
of function aliases.

Side note, the validation layers think this is an error because they're
too old to know about host query resets. I think this commit gives the
term "bleeding edge" a new meaning.
  • Loading branch information
haasn committed May 25, 2020
1 parent 6076fca commit cd05aa7
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 23 deletions.
6 changes: 6 additions & 0 deletions src/vulkan/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
#define PL_VK_MAX_QUEUED_CMDS 1024
#define PL_VK_MAX_PENDING_CMDS 1024

// Shitty compatibility alias for very old vulkan.h versions
#ifndef VK_API_VERSION_1_2
#define VK_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, 0)
#endif

// Shared struct used to hold vulkan context information
struct vk_ctx {
void *ta; // allocations bound to the lifetime of this vk_ctx
Expand Down Expand Up @@ -205,6 +210,7 @@ struct vk_ctx {
VK_FUN(QueueSubmit);
VK_FUN(ResetEvent);
VK_FUN(ResetFences);
VK_FUN(ResetQueryPoolEXT);
VK_FUN(SetDebugUtilsObjectNameEXT);
VK_FUN(SetHdrMetadataEXT);
VK_FUN(UpdateDescriptorSets);
Expand Down
25 changes: 25 additions & 0 deletions src/vulkan/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const struct pl_vk_inst_params pl_vk_inst_default_params = {0};

struct vk_fun {
const char *name;
const char *alias;
size_t offset;
bool device_level;
};
Expand All @@ -45,6 +46,13 @@ struct vk_ext {
.device_level = true, \
}

#define VK_DEV_FUN_ALIAS(N, ALIAS) \
{ .name = "vk" #N, \
.alias = #ALIAS, \
.offset = offsetof(struct vk_ctx, N), \
.device_level = true, \
}

// Table of optional vulkan instance extensions
static const char *vk_instance_extensions[] = {
VK_KHR_SURFACE_EXTENSION_NAME,
Expand Down Expand Up @@ -156,6 +164,13 @@ static const struct vk_ext vk_device_extensions[] = {
VK_DEV_FUN(SetHdrMetadataEXT),
{0},
},
}, {
.name = VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
.core_ver = VK_API_VERSION_1_2,
.funs = (struct vk_fun[]) {
VK_DEV_FUN_ALIAS(ResetQueryPoolEXT, vkResetQueryPool),
{0},
},
},
};

Expand All @@ -173,13 +188,21 @@ const char * const pl_vulkan_recommended_extensions[] = {
#endif
VK_EXT_PCI_BUS_INFO_EXTENSION_NAME,
VK_EXT_HDR_METADATA_EXTENSION_NAME,
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
};

const int pl_vulkan_num_recommended_extensions =
PL_ARRAY_SIZE(pl_vulkan_recommended_extensions);

// pNext chain of features we want enabled
static const VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT,
.hostQueryReset = true,
};

const VkPhysicalDeviceFeatures2KHR pl_vulkan_recommended_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR,
.pNext = (void *) &host_query_reset,
.features = {
.shaderImageGatherExtended = true,

Expand Down Expand Up @@ -1161,6 +1184,8 @@ static bool device_init(struct vk_ctx *vk, const struct pl_vulkan_params *params
PFN_vkVoidFunction *pfn = (void *) ((uintptr_t) vk + (ptrdiff_t) fun->offset);
if (fun->device_level) {
*pfn = vk->GetDeviceProcAddr(vk->dev, fun->name);
if (fun->alias && !*pfn)
*pfn = vk->GetDeviceProcAddr(vk->dev, fun->alias);
} else {
*pfn = vk->GetInstanceProcAddr(vk->inst, fun->name);
};
Expand Down
73 changes: 50 additions & 23 deletions src/vulkan/gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@ struct pl_vk {
struct vk_malloc *alloc;
struct spirv_compiler *spirv;

// Some additional cached device limits
// Some additional cached device limits and features checks
uint32_t max_push_descriptors;
size_t min_texel_alignment;
bool host_query_reset;

// This is a pl_dispatch used (on ourselves!) for the purposes of
// dispatching compute shaders for performing various emulation tasks
Expand Down Expand Up @@ -119,7 +120,7 @@ static inline bool supports_marks(struct vk_cmd *cmd) {
} while (0)

#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
static void fun##_lazy(const struct pl_gpu *gpu, const argtype *arg) { \
static void fun##_lazy(const struct pl_gpu *gpu, argtype *arg) { \
struct pl_vk *p = TA_PRIV(gpu); \
struct vk_ctx *vk = p->vk; \
if (p->cmd) { \
Expand Down Expand Up @@ -419,6 +420,15 @@ const struct pl_gpu *pl_gpu_create_vk(struct vk_ctx *vk)
p->max_push_descriptors = pushd.maxPushDescriptors;
}

if (vk->ResetQueryPoolEXT) {
const VkPhysicalDeviceHostQueryResetFeaturesEXT *host_query_reset;
host_query_reset = vk_find_struct(&vk->features,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT);

if (host_query_reset)
p->host_query_reset = host_query_reset->hostQueryReset;
}

// We ostensibly support this, although it can still fail on buffer
// creation (for certain combinations of buffers)
gpu->caps |= PL_GPU_CAP_MAPPED_BUFFERS;
Expand Down Expand Up @@ -678,7 +688,7 @@ static void vk_tex_destroy(const struct pl_gpu *gpu, struct pl_tex *tex)
talloc_free(tex);
}

MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct pl_tex)
MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, const struct pl_tex)

static const VkFilter filters[] = {
[PL_TEX_SAMPLE_NEAREST] = VK_FILTER_NEAREST,
Expand Down Expand Up @@ -1487,7 +1497,7 @@ static void buf_flush(const struct pl_gpu *gpu, struct vk_cmd *cmd,
}

#define vk_buf_destroy vk_buf_deref
MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct pl_buf)
MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, const struct pl_buf)

static void vk_buf_write(const struct pl_gpu *gpu, const struct pl_buf *buf,
size_t offset, const void *data, size_t size)
Expand Down Expand Up @@ -2084,7 +2094,7 @@ static void vk_pass_destroy(const struct pl_gpu *gpu, struct pl_pass *pass)
talloc_free(pass);
}

MAKE_LAZY_DESTRUCTOR(vk_pass_destroy, struct pl_pass)
MAKE_LAZY_DESTRUCTOR(vk_pass_destroy, const struct pl_pass)

static const VkDescriptorType dsType[] = {
[PL_DESC_SAMPLED_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
Expand Down Expand Up @@ -3079,37 +3089,37 @@ static bool vk_tex_export(const struct pl_gpu *gpu, const struct pl_tex *tex,
#define VK_QUERY_POOL_SIZE 16

struct pl_timer {
int refcount;
bool recording; // true between vk_cmd_timer_begin() and vk_cmd_timer_end()
VkQueryPool qpool; // even=start, odd=stop
int index_write; // next index to write to
int index_read; // next index to read from
uint64_t pending; // bitmask of queries that are still running
};

static inline uint64_t timer_bit(int index)
{
return 1llu << (index / 2);
}

static void vk_timer_destroy(const struct pl_gpu *gpu, struct pl_timer *timer)
{
struct pl_vk *p = TA_PRIV(gpu);
struct vk_ctx *vk = p->vk;

pl_assert(!timer->pending);
vk->DestroyQueryPool(vk->dev, timer->qpool, VK_ALLOC);
talloc_free(timer);
}

static void vk_timer_deref(const struct pl_gpu *gpu, struct pl_timer *timer)
{
if (--timer->refcount == 0)
vk_timer_destroy(gpu, timer);
}
MAKE_LAZY_DESTRUCTOR(vk_timer_destroy, struct pl_timer)

static struct pl_timer *vk_timer_create(const struct pl_gpu *gpu)
{
struct pl_vk *p = TA_PRIV(gpu);
struct vk_ctx *vk = p->vk;

struct pl_timer *timer = talloc_ptrtype(NULL, timer);
*timer = (struct pl_timer) {
.refcount = 1,
};
*timer = (struct pl_timer) {0};

struct VkQueryPoolCreateInfo qinfo = {
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
Expand Down Expand Up @@ -3167,19 +3177,36 @@ static void vk_cmd_timer_begin(const struct pl_gpu *gpu, struct vk_cmd *cmd,
return;
}

vk_poll_commands(vk, 0);
if (timer->pending & timer_bit(timer->index_write))
return; // next query is still running, skip this timer

VkQueueFlags reset_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
if (!(cmd->pool->props.queueFlags & reset_flags)) {
PL_TRACE(gpu, "QF %d does not support query pool resets", cmd->pool->qf);
if (cmd->pool->props.queueFlags & reset_flags) {
// Use direct command buffer resets
vk->CmdResetQueryPool(cmd->buf, timer->qpool, timer->index_write, 2);
} else if (p->host_query_reset) {
// Use host query resets
vk->ResetQueryPoolEXT(vk->dev, timer->qpool, timer->index_write, 2);
} else {
PL_TRACE(gpu, "QF %d supports no mechanism for resetting queries",
cmd->pool->qf);
return;
}

vk->CmdResetQueryPool(cmd->buf, timer->qpool, timer->index_write, 2);
vk->CmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
timer->qpool, timer->index_write);

timer->recording = true;
}

static void vk_timer_cb(void *ptimer, void *pindex)
{
struct pl_timer *timer = ptimer;
int index = (uintptr_t) pindex;
timer->pending &= ~timer_bit(index);
}

static void vk_cmd_timer_end(const struct pl_gpu *gpu, struct vk_cmd *cmd,
struct pl_timer *timer)
{
Expand All @@ -3192,18 +3219,18 @@ static void vk_cmd_timer_end(const struct pl_gpu *gpu, struct vk_cmd *cmd,
vk->CmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
timer->qpool, timer->index_write + 1);

timer->recording = false;
timer->pending |= timer_bit(timer->index_write);
vk_cmd_callback(cmd, (vk_cb) vk_timer_cb, timer,
(void *) (uintptr_t) timer->index_write);

timer->index_write = (timer->index_write + 2) % VK_QUERY_POOL_SIZE;
if (timer->index_write == timer->index_read) {
// forcibly drop the least recent result to make space
timer->index_read = (timer->index_read + 2) % VK_QUERY_POOL_SIZE;
}

timer->recording = false;
timer->refcount++;
vk_cmd_callback(cmd, (vk_cb) vk_timer_deref, gpu, timer);
}


static void vk_gpu_flush(const struct pl_gpu *gpu)
{
struct pl_vk *p = TA_PRIV(gpu);
Expand Down Expand Up @@ -3252,7 +3279,7 @@ static const struct pl_gpu_fns pl_fns_vk = {
.sync_destroy = vk_sync_deref,
.tex_export = vk_tex_export,
.timer_create = vk_timer_create,
.timer_destroy = vk_timer_deref,
.timer_destroy = vk_timer_destroy_lazy,
.timer_query = vk_timer_query,
.gpu_flush = vk_gpu_flush,
.gpu_finish = vk_gpu_finish,
Expand Down

0 comments on commit cd05aa7

Please sign in to comment.