From 27c178861a2bfcaee5034e65fbd247ee4ce9ccda Mon Sep 17 00:00:00 2001 From: Evan Hemsley Date: Mon, 18 May 2026 16:36:21 -0700 Subject: [PATCH 1/5] GPU: Query API --- include/SDL3/SDL_gpu.h | 155 ++++++++++++++++ src/gpu/SDL_gpu.c | 103 +++++++++++ src/gpu/SDL_sysgpu.h | 30 ++++ src/gpu/vulkan/SDL_gpu_vulkan.c | 229 ++++++++++++++++++++++++ src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h | 6 + 5 files changed, 523 insertions(+) diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h index 6cdd8b6a39..040b476675 100644 --- a/include/SDL3/SDL_gpu.h +++ b/include/SDL3/SDL_gpu.h @@ -597,6 +597,20 @@ typedef struct SDL_GPUCopyPass SDL_GPUCopyPass; */ typedef struct SDL_GPUFence SDL_GPUFence; +/** + * An opaque handle representing a query pool. + * + * \since This struct is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + * \sa SDL_ReleaseGPUQueryPool + * \sa SDL_BeginGPUQuery + * \sa SDL_EndGPUQuery + * \sa SDL_CopyGPUQueryResultsToBuffer + * \sa SDL_GetGPUTimestampFrequency + */ +typedef struct SDL_GPUQueryPool SDL_GPUQueryPool; + /** * Specifies the primitive topology of a graphics pipeline. * @@ -1370,6 +1384,20 @@ typedef enum SDL_GPUSwapchainComposition SDL_GPU_SWAPCHAINCOMPOSITION_HDR10_ST2084 } SDL_GPUSwapchainComposition; +/** + * Specifies a kind of GPU Query. + * + * \since This enum is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + */ +typedef enum SDL_GPUQueryType +{ + SDL_GPU_QUERY_TIMESTAMP, + SDL_GPU_QUERY_BINARY_OCCLUSION, + SDL_GPU_QUERY_PRECISE_OCCLUSION +} SDL_GPUQueryType; + /* Structures */ /** @@ -1816,6 +1844,21 @@ typedef struct SDL_GPUTransferBufferCreateInfo SDL_PropertiesID props; /**< A properties ID for extensions. Should be 0 if no extensions are needed. */ } SDL_GPUTransferBufferCreateInfo; +/** + * A structure specifying the parameters of a query pool. + * + * \since This struct is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + */ +typedef struct SDL_GPUQueryPoolCreateInfo +{ + SDL_GPUQueryType type; /**< The type of query intended to be used by the client. */ + Uint32 query_count; /**< The maximum number of queries in the pool. */ + + SDL_PropertiesID props; /**< A properties ID for extensions. Should be 0 if no extensions are needed. */ +} SDL_GPUQueryPoolCreateInfo; + /* Pipeline state structures */ /** @@ -4028,6 +4071,30 @@ extern SDL_DECLSPEC void SDLCALL SDL_DownloadFromGPUBuffer( const SDL_GPUBufferRegion *source, const SDL_GPUTransferBufferLocation *destination); +/** + * Copies results of a GPU query to a buffer. + * + * This data is not guaranteed to be copied until the command buffer fence is + * signaled. + * + * After this function is called, the data in the query pool is no longer valid, + * so don't call this function multiple times before performing another query. + * + * \param copy_pass a copy pass handle. + * \param pool a query pool handle. + * \param first_query starting index of the queries to copy. + * \param count the number of queries to copy. + * \param destination the destination buffer and offset. + * + * \since This struct is available since SDL 3.6.0. + */ +extern SDL_DECLSPEC void SDLCALL SDL_CopyGPUQueryResultsToBuffer( + SDL_GPUCopyPass *copy_pass, + SDL_GPUQueryPool *pool, + Uint32 first_query, + Uint32 count, + SDL_GPUBufferLocation *destination); + /** * Ends the current copy pass. * @@ -4492,6 +4559,94 @@ extern SDL_DECLSPEC void SDLCALL SDL_ReleaseGPUFence( SDL_GPUDevice *device, SDL_GPUFence *fence); +/** + * Gets GPU timestamp frequency. + * + * Use this to compute wall clock times from timestamps. + * + * \param device a GPU context. + * \returns the number of nanoseconds required for a timestamp query to be incremented by 1. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + */ +extern SDL_DECLSPEC Uint64 SDLCALL SDL_GetGPUTimestampFrequency(SDL_GPUDevice *device); + +/** + * Creates a query pool object to be used in queries. + * + * \param device a GPU context. + * \param createinfo a struct describing the state of the pool to create. + * \returns a query pool object on success, or NULL on failure; call + * SDL_GetError() for more information. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_GetGPUTimestampFrequency + * \sa SDL_BeginGPUQuery + * \sa SDL_EndGPUQuery + * \sa SDL_CopyGPUQueryResultsToBuffer + * \sa SDL_ReleaseGPUQueryPool + */ +extern SDL_DECLSPEC SDL_GPUQueryPool * SDLCALL SDL_CreateGPUQueryPool( + SDL_GPUDevice *device, + SDL_GPUQueryPoolCreateInfo *createinfo); + +/** + * Begins a query on a command buffer. + * + * For timestamp queries, this will produce a timestamp as soon as all previous commands are taken by the command queue. + * Note that this means for timestamp queries you should use a different index from the one you use in SDL_EndGPUQuery. + * + * \param command_buffer a command buffer. + * \param pool a query pool. + * \param index the index within the pool for the query. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_EndGPUQuery + */ +extern SDL_DECLSPEC void SDLCALL SDL_BeginGPUQuery( + SDL_GPUCommandBuffer *command_buffer, + SDL_GPUQueryPool *pool, + Uint32 index); + +/** + * Ends a query on a command buffer. + * + * For timestamp queries, this will produce a timestamp as soon as all previous commands are finished in the command queue. + * Note that this means for timestamp queries you should use a different index from the one you used in SDL_BeginGPUQuery. + * + * \param command_buffer a command buffer. + * \param pool a query pool. + * \param index the index within the pool for the query. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_BeginGPUQuery + */ +extern SDL_DECLSPEC void SDLCALL SDL_EndGPUQuery( + SDL_GPUCommandBuffer *command_buffer, + SDL_GPUQueryPool *pool, + Uint32 index); + +/** + * Frees the given query pool as soon as it is safe to do so. + * + * You must not reference the query pool after calling this function. + * + * \param device a GPU context. + * \param pool a query pool. + * + * \since This function is available since SDL 3.6.0. + * + * \sa SDL_CreateGPUQueryPool + */ +extern SDL_DECLSPEC void SDLCALL SDL_ReleaseGPUQueryPool( + SDL_GPUDevice *device, + SDL_GPUQueryPool *pool); + /* Format Info */ /** diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c index 01e1eb5e9c..bc7c106ef3 100644 --- a/src/gpu/SDL_gpu.c +++ b/src/gpu/SDL_gpu.c @@ -3020,6 +3020,36 @@ void SDL_DownloadFromGPUBuffer( destination); } +void SDL_CopyGPUQueryResultsToBuffer( + SDL_GPUCopyPass *copy_pass, + SDL_GPUQueryPool *pool, + Uint32 first_query, + Uint32 count, + SDL_GPUBufferLocation *destination) +{ + CHECK_PARAM(copy_pass == NULL) { + SDL_InvalidParamError("copy_pass"); + return; + } + + CHECK_PARAM(pool == NULL) { + SDL_InvalidParamError("pool"); + return; + } + + CHECK_PARAM(destination == NULL) { + SDL_InvalidParamError("destination"); + return; + } + + COPYPASS_DEVICE->CopyQueryResultsToBuffer( + COPYPASS_COMMAND_BUFFER, + pool, + first_query, + count, + destination); +} + void SDL_EndGPUCopyPass( SDL_GPUCopyPass *copy_pass) { @@ -3511,6 +3541,79 @@ void SDL_ReleaseGPUFence( fence); } +SDL_GPUQueryPool *SDL_CreateGPUQueryPool( + SDL_GPUDevice *device, + SDL_GPUQueryPoolCreateInfo *createinfo) +{ + CHECK_DEVICE_MAGIC(device, NULL); + + CHECK_PARAM(createinfo == NULL) { + SDL_InvalidParamError("createinfo"); + return NULL; + } + + return device->CreateQueryPool( + device->driverData, + createinfo); +} + +void SDL_BeginGPUQuery( + SDL_GPUCommandBuffer *command_buffer, + SDL_GPUQueryPool *pool, + Uint32 index) +{ + CHECK_PARAM(command_buffer == NULL) { + SDL_InvalidParamError("command_buffer"); + return; + } + + CHECK_PARAM(pool == NULL) { + SDL_InvalidParamError("pool"); + return; + } + + COMMAND_BUFFER_DEVICE->BeginQuery( + command_buffer, + pool, + index); +} + +void SDL_EndGPUQuery( + SDL_GPUCommandBuffer *command_buffer, + SDL_GPUQueryPool *pool, + Uint32 index) +{ + CHECK_PARAM(command_buffer == NULL) { + SDL_InvalidParamError("command_buffer"); + return; + } + + CHECK_PARAM(pool == NULL) { + SDL_InvalidParamError("pool"); + return; + } + + COMMAND_BUFFER_DEVICE->EndQuery( + command_buffer, + pool, + index); +} + +void SDL_ReleaseGPUQueryPool( + SDL_GPUDevice *device, + SDL_GPUQueryPool *pool) +{ + CHECK_DEVICE_MAGIC(device, ); + + CHECK_PARAM(pool == NULL) { + return; + } + + return device->ReleaseQueryPool( + device->driverData, + pool); +} + Uint32 SDL_CalculateGPUTextureFormatSize( SDL_GPUTextureFormat format, Uint32 width, diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h index 604be78b5b..91f78b537b 100644 --- a/src/gpu/SDL_sysgpu.h +++ b/src/gpu/SDL_sysgpu.h @@ -995,6 +995,13 @@ struct SDL_GPUDevice Uint32 size, bool cycle); + void (*CopyQueryResultsToBuffer)( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 first_query, + Uint32 count, + const SDL_GPUBufferLocation *destination); + void (*GenerateMipmaps)( SDL_GPUCommandBuffer *commandBuffer, SDL_GPUTexture *texture); @@ -1097,6 +1104,24 @@ struct SDL_GPUDevice SDL_GPURenderer *driverData, SDL_GPUFence *fence); + SDL_GPUQueryPool *(*CreateQueryPool)( + SDL_GPURenderer *driverData, + SDL_GPUQueryPoolCreateInfo *createinfo); + + void (*BeginQuery)( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 index); + + void (*EndQuery)( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 index); + + void (*ReleaseQueryPool)( + SDL_GPURenderer *driverData, + SDL_GPUQueryPool *pool); + // Feature Queries bool (*SupportsTextureFormat)( @@ -1193,6 +1218,7 @@ struct SDL_GPUDevice ASSIGN_DRIVER_FUNC(DownloadFromBuffer, name) \ ASSIGN_DRIVER_FUNC(CopyTextureToTexture, name) \ ASSIGN_DRIVER_FUNC(CopyBufferToBuffer, name) \ + ASSIGN_DRIVER_FUNC(CopyQueryResultsToBuffer, name) \ ASSIGN_DRIVER_FUNC(GenerateMipmaps, name) \ ASSIGN_DRIVER_FUNC(EndCopyPass, name) \ ASSIGN_DRIVER_FUNC(Blit, name) \ @@ -1214,6 +1240,10 @@ struct SDL_GPUDevice ASSIGN_DRIVER_FUNC(WaitForFences, name) \ ASSIGN_DRIVER_FUNC(QueryFence, name) \ ASSIGN_DRIVER_FUNC(ReleaseFence, name) \ + ASSIGN_DRIVER_FUNC(CreateQueryPool, name) \ + ASSIGN_DRIVER_FUNC(BeginQuery, name) \ + ASSIGN_DRIVER_FUNC(EndQuery, name) \ + ASSIGN_DRIVER_FUNC(ReleaseQueryPool, name) \ ASSIGN_DRIVER_FUNC(SupportsTextureFormat, name) \ ASSIGN_DRIVER_FUNC(SupportsSampleCount, name) diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c index 3b9efe5c45..a2c00b8b57 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan.c +++ b/src/gpu/vulkan/SDL_gpu_vulkan.c @@ -468,6 +468,12 @@ static VkSamplerAddressMode SDLToVK_SamplerAddressMode[] = { VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE }; +static VkQueryType SDLToVK_QueryType[] = { + VK_QUERY_TYPE_TIMESTAMP, + VK_QUERY_TYPE_OCCLUSION, + VK_QUERY_TYPE_OCCLUSION +}; + // Structures typedef struct VulkanRenderer VulkanRenderer; @@ -967,6 +973,13 @@ typedef struct FramebufferHashTableKey Uint32 height; } FramebufferHashTableKey; +typedef struct VulkanQueryPool +{ + VkQueryPool pool; + SDL_GPUQueryType type; + SDL_AtomicInt referenceCount; +} VulkanQueryPool; + // Command structures typedef struct VulkanFencePool @@ -1110,6 +1123,10 @@ typedef struct VulkanCommandBuffer Sint32 usedComputePipelineCount; Sint32 usedComputePipelineCapacity; + VulkanQueryPool **usedQueryPools; + Sint32 usedQueryPoolCount; + Sint32 usedQueryPoolCapacity; + VulkanFramebuffer **usedFramebuffers; Sint32 usedFramebufferCount; Sint32 usedFramebufferCapacity; @@ -1251,6 +1268,10 @@ struct VulkanRenderer Uint32 shadersToDestroyCount; Uint32 shadersToDestroyCapacity; + VulkanQueryPool **queryPoolsToDestroy; + Uint32 queryPoolsToDestroyCount; + Uint32 queryPoolsToDestroyCapacity; + VulkanFramebuffer **framebuffersToDestroy; Uint32 framebuffersToDestroyCount; Uint32 framebuffersToDestroyCapacity; @@ -2558,6 +2579,19 @@ static void VULKAN_INTERNAL_TrackComputePipeline( computePipeline->referenceCount); } +static void VULKAN_INTERNAL_TrackQueryPool( + VulkanCommandBuffer *commandBuffer, + VulkanQueryPool *pool) +{ + TRACK_RESOURCE( + pool, + VulkanQueryPool *, + usedQueryPools, + usedQueryPoolCount, + usedQueryPoolCapacity, + pool->referenceCount); +} + static void VULKAN_INTERNAL_TrackFramebuffer( VulkanCommandBuffer *commandBuffer, VulkanFramebuffer *framebuffer) @@ -3246,6 +3280,7 @@ static void VULKAN_INTERNAL_DestroyCommandPool( SDL_free(commandBuffer->usedSamplers); SDL_free(commandBuffer->usedGraphicsPipelines); SDL_free(commandBuffer->usedComputePipelines); + SDL_free(commandBuffer->usedQueryPools); SDL_free(commandBuffer->usedFramebuffers); SDL_free(commandBuffer->usedUniformBuffers); @@ -3335,6 +3370,18 @@ static void VULKAN_INTERNAL_DestroySampler( SDL_free(vulkanSampler); } +static void VULKAN_INTERNAL_DestroyQueryPool( + VulkanRenderer *renderer, + VulkanQueryPool *vulkanQueryPool) +{ + renderer->vkDestroyQueryPool( + renderer->logicalDevice, + vulkanQueryPool->pool, + NULL); + + SDL_free(vulkanQueryPool); +} + static void VULKAN_INTERNAL_DestroySwapchainImage( VulkanRenderer *renderer, WindowData *windowData) @@ -5095,6 +5142,7 @@ static void VULKAN_DestroyDevice( SDL_free(renderer->computePipelinesToDestroy); SDL_free(renderer->shadersToDestroy); SDL_free(renderer->samplersToDestroy); + SDL_free(renderer->queryPoolsToDestroy); SDL_free(renderer->framebuffersToDestroy); SDL_free(renderer->allocationsToDefrag); @@ -7075,6 +7123,40 @@ static SDL_GPUTransferBuffer *VULKAN_CreateTransferBuffer( debugName); } +static SDL_GPUQueryPool *VULKAN_CreateQueryPool( + SDL_GPURenderer *driverData, + SDL_GPUQueryPoolCreateInfo *createinfo) +{ + VulkanRenderer *renderer = (VulkanRenderer *)driverData; + VkQueryPoolCreateInfo vkQueryPoolCreateInfo; + VkResult result; + VulkanQueryPool *pool = SDL_malloc(sizeof(VulkanQueryPool)); + + vkQueryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + vkQueryPoolCreateInfo.pNext = NULL; + vkQueryPoolCreateInfo.flags = VK_QUERY_POOL_CREATE_RESET_BIT_KHR; + vkQueryPoolCreateInfo.pipelineStatistics = 0; + vkQueryPoolCreateInfo.queryCount = createinfo->query_count; + vkQueryPoolCreateInfo.queryType = SDLToVK_QueryType[createinfo->type]; + + result = renderer->vkCreateQueryPool( + renderer->logicalDevice, + &vkQueryPoolCreateInfo, + NULL, + &pool->pool + ); + + if (result != VK_SUCCESS) { + SDL_free(pool); + CHECK_VULKAN_ERROR_AND_RETURN(result, vkCreateQueryPool, NULL); + } + + SDL_SetAtomicInt(&pool->referenceCount, 0); + pool->type = createinfo->type; + + return (SDL_GPUQueryPool *)pool; +} + static void VULKAN_INTERNAL_ReleaseTexture( VulkanRenderer *renderer, VulkanTexture *vulkanTexture) @@ -7285,6 +7367,28 @@ static void VULKAN_ReleaseGraphicsPipeline( SDL_UnlockMutex(renderer->disposeLock); } +static void VULKAN_ReleaseQueryPool( + SDL_GPURenderer *driverData, + SDL_GPUQueryPool *pool) +{ + VulkanRenderer *renderer = (VulkanRenderer *)driverData; + VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + + SDL_LockMutex(renderer->disposeLock); + + EXPAND_ARRAY_IF_NEEDED( + renderer->queryPoolsToDestroy, + VulkanQueryPool *, + renderer->queryPoolsToDestroyCount + 1, + renderer->queryPoolsToDestroyCapacity, + renderer->queryPoolsToDestroyCapacity * 2); + + renderer->queryPoolsToDestroy[renderer->queryPoolsToDestroyCount] = vulkanQueryPool; + renderer->queryPoolsToDestroyCount += 1; + + SDL_UnlockMutex(renderer->disposeLock); +} + // Command Buffer render state static VkRenderPass VULKAN_INTERNAL_FetchRenderPass( @@ -9258,6 +9362,50 @@ static void VULKAN_CopyBufferToBuffer( SDL_UnlockRWLock(renderer->defragLock); } +static void VULKAN_CopyQueryResultsToBuffer( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 firstQuery, + Uint32 count, + const SDL_GPUBufferLocation *destination) +{ + VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + VulkanRenderer *renderer = vulkanCommandBuffer->renderer; + VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + VulkanBufferContainer *dstContainer = (VulkanBufferContainer *)destination->buffer; + + SDL_LockRWLockForReading(renderer->defragLock); + + VulkanBuffer *dstBuffer = VULKAN_INTERNAL_PrepareBufferForWrite( + renderer, + vulkanCommandBuffer, + dstContainer, + false, // TODO: should this function take a cycle param? + VULKAN_BUFFER_USAGE_MODE_COPY_DESTINATION); + + renderer->vkCmdCopyQueryPoolResults( + vulkanCommandBuffer->commandBuffer, + vulkanQueryPool->pool, + firstQuery, + count, + dstBuffer->buffer, + destination->offset, + 8, // Result for timing and occlusion is one 64-bit integer + VK_QUERY_RESULT_64_BIT); + + VULKAN_INTERNAL_BufferTransitionToDefaultUsage( + renderer, + vulkanCommandBuffer, + VULKAN_BUFFER_USAGE_MODE_COPY_DESTINATION, + dstBuffer); + + VULKAN_INTERNAL_TrackQueryPool(vulkanCommandBuffer, vulkanQueryPool); + VULKAN_INTERNAL_TrackBuffer(vulkanCommandBuffer, dstBuffer); + VULKAN_INTERNAL_TrackBufferTransfer(vulkanCommandBuffer, dstBuffer); + + SDL_UnlockRWLock(renderer->defragLock); +} + static void VULKAN_GenerateMipmaps( SDL_GPUCommandBuffer *commandBuffer, SDL_GPUTexture *texture) @@ -9597,6 +9745,11 @@ static bool VULKAN_INTERNAL_AllocateCommandBuffer( commandBuffer->usedComputePipelines = SDL_malloc( commandBuffer->usedComputePipelineCapacity * sizeof(VulkanComputePipeline *)); + commandBuffer->usedQueryPoolCapacity = 4; + commandBuffer->usedQueryPoolCount = 0; + commandBuffer->usedQueryPools = SDL_malloc( + commandBuffer->usedQueryPoolCapacity * sizeof(VulkanQueryPool *)); + commandBuffer->usedFramebufferCapacity = 4; commandBuffer->usedFramebufferCount = 0; commandBuffer->usedFramebuffers = SDL_malloc( @@ -9864,6 +10017,59 @@ static void VULKAN_ReleaseFence( } } +static void VULKAN_BeginQuery( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 index) +{ + VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + VulkanRenderer *renderer = vulkanCommandBuffer->renderer; + VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + + // Timestamp queries don't begin and end, we just need a distinction between + // a timestamp written when preceding commands are taken and when preceding commands are finished. + if (vulkanQueryPool->type == SDL_GPU_QUERY_TIMESTAMP) { + renderer->vkCmdWriteTimestamp( + vulkanCommandBuffer->commandBuffer, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + vulkanQueryPool->pool, + index); + } + else { + renderer->vkCmdBeginQuery( + vulkanCommandBuffer->commandBuffer, + vulkanQueryPool->pool, + index, + vulkanQueryPool->type == SDL_GPU_QUERY_PRECISE_OCCLUSION ? VK_QUERY_CONTROL_PRECISE_BIT : 0); + } +} + +static void VULKAN_EndQuery( + SDL_GPUCommandBuffer *commandBuffer, + SDL_GPUQueryPool *pool, + Uint32 index) +{ + VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + VulkanRenderer *renderer = vulkanCommandBuffer->renderer; + VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + + // Timestamp queries don't begin and end, we just need a distinction between + // a timestamp written when preceding commands are taken and when preceding commands are finished. + if (vulkanQueryPool->type == SDL_GPU_QUERY_TIMESTAMP) { + renderer->vkCmdWriteTimestamp( + vulkanCommandBuffer->commandBuffer, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + vulkanQueryPool->pool, + index); + } + else { + renderer->vkCmdEndQuery( + vulkanCommandBuffer->commandBuffer, + vulkanQueryPool->pool, + index); + } +} + static WindowData *VULKAN_INTERNAL_FetchWindowData( SDL_Window *window) { @@ -10610,6 +10816,17 @@ static void VULKAN_INTERNAL_PerformPendingDestroys( } } + for (Sint32 i = renderer->queryPoolsToDestroyCount - 1; i >= 0; i -= 1){ + if (SDL_GetAtomicInt(&renderer->queryPoolsToDestroy[i]->referenceCount) == 0) { + VULKAN_INTERNAL_DestroyQueryPool( + renderer, + renderer->queryPoolsToDestroy[i]); + + renderer->queryPoolsToDestroy[i] = renderer->queryPoolsToDestroy[renderer->queryPoolsToDestroyCount - 1]; + renderer->queryPoolsToDestroyCount -= 1; + } + } + for (Sint32 i = renderer->framebuffersToDestroyCount - 1; i >= 0; i -= 1) { if (SDL_GetAtomicInt(&renderer->framebuffersToDestroy[i]->referenceCount) == 0) { VULKAN_INTERNAL_DestroyFramebuffer( @@ -10687,6 +10904,11 @@ static void VULKAN_INTERNAL_CleanCommandBuffer( } commandBuffer->usedComputePipelineCount = 0; + for (Sint32 i = 0; i < commandBuffer->usedQueryPoolCount; i += 1) { + (void)(SDL_AtomicDecRef(&commandBuffer->usedQueryPools[i]->referenceCount)); + } + commandBuffer->usedQueryPoolCount = 0; + for (Sint32 i = 0; i < commandBuffer->usedFramebufferCount; i += 1) { (void)SDL_AtomicDecRef(&commandBuffer->usedFramebuffers[i]->referenceCount); } @@ -13741,6 +13963,13 @@ static SDL_GPUDevice *VULKAN_CreateDevice(bool debugMode, bool preferLowPower, S sizeof(VulkanShader *) * renderer->shadersToDestroyCapacity); + renderer->queryPoolsToDestroyCapacity = 16; + renderer->queryPoolsToDestroyCount = 0; + + renderer->queryPoolsToDestroy = SDL_malloc( + sizeof(VulkanQueryPool *) * + renderer->queryPoolsToDestroyCapacity); + renderer->framebuffersToDestroyCapacity = 16; renderer->framebuffersToDestroyCount = 0; renderer->framebuffersToDestroy = SDL_malloc( diff --git a/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h b/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h index a56beff11e..8940316dcb 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h +++ b/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h @@ -86,6 +86,7 @@ VULKAN_DEVICE_FUNCTION(vkBeginCommandBuffer) VULKAN_DEVICE_FUNCTION(vkBindBufferMemory) VULKAN_DEVICE_FUNCTION(vkBindImageMemory) VULKAN_DEVICE_FUNCTION(vkCmdBeginRenderPass) +VULKAN_DEVICE_FUNCTION(vkCmdBeginQuery) VULKAN_DEVICE_FUNCTION(vkCmdBindDescriptorSets) VULKAN_DEVICE_FUNCTION(vkCmdBindIndexBuffer) VULKAN_DEVICE_FUNCTION(vkCmdBindPipeline) @@ -98,6 +99,7 @@ VULKAN_DEVICE_FUNCTION(vkCmdCopyBuffer) VULKAN_DEVICE_FUNCTION(vkCmdCopyImage) VULKAN_DEVICE_FUNCTION(vkCmdCopyBufferToImage) VULKAN_DEVICE_FUNCTION(vkCmdCopyImageToBuffer) +VULKAN_DEVICE_FUNCTION(vkCmdCopyQueryPoolResults) VULKAN_DEVICE_FUNCTION(vkCmdDispatch) VULKAN_DEVICE_FUNCTION(vkCmdDispatchIndirect) VULKAN_DEVICE_FUNCTION(vkCmdDraw) @@ -105,6 +107,7 @@ VULKAN_DEVICE_FUNCTION(vkCmdDrawIndexed) VULKAN_DEVICE_FUNCTION(vkCmdDrawIndexedIndirect) VULKAN_DEVICE_FUNCTION(vkCmdDrawIndirect) VULKAN_DEVICE_FUNCTION(vkCmdEndRenderPass) +VULKAN_DEVICE_FUNCTION(vkCmdEndQuery) VULKAN_DEVICE_FUNCTION(vkCmdPipelineBarrier) VULKAN_DEVICE_FUNCTION(vkCmdResolveImage) VULKAN_DEVICE_FUNCTION(vkCmdSetBlendConstants) @@ -112,6 +115,7 @@ VULKAN_DEVICE_FUNCTION(vkCmdSetDepthBias) VULKAN_DEVICE_FUNCTION(vkCmdSetScissor) VULKAN_DEVICE_FUNCTION(vkCmdSetStencilReference) VULKAN_DEVICE_FUNCTION(vkCmdSetViewport) +VULKAN_DEVICE_FUNCTION(vkCmdWriteTimestamp) VULKAN_DEVICE_FUNCTION(vkCreateBuffer) VULKAN_DEVICE_FUNCTION(vkCreateCommandPool) VULKAN_DEVICE_FUNCTION(vkCreateDescriptorPool) @@ -128,6 +132,7 @@ VULKAN_DEVICE_FUNCTION(vkCreateRenderPass) VULKAN_DEVICE_FUNCTION(vkCreateSampler) VULKAN_DEVICE_FUNCTION(vkCreateSemaphore) VULKAN_DEVICE_FUNCTION(vkCreateShaderModule) +VULKAN_DEVICE_FUNCTION(vkCreateQueryPool) VULKAN_DEVICE_FUNCTION(vkDestroyBuffer) VULKAN_DEVICE_FUNCTION(vkDestroyCommandPool) VULKAN_DEVICE_FUNCTION(vkDestroyDescriptorPool) @@ -144,6 +149,7 @@ VULKAN_DEVICE_FUNCTION(vkDestroyRenderPass) VULKAN_DEVICE_FUNCTION(vkDestroySampler) VULKAN_DEVICE_FUNCTION(vkDestroySemaphore) VULKAN_DEVICE_FUNCTION(vkDestroyShaderModule) +VULKAN_DEVICE_FUNCTION(vkDestroyQueryPool) VULKAN_DEVICE_FUNCTION(vkDeviceWaitIdle) VULKAN_DEVICE_FUNCTION(vkEndCommandBuffer) VULKAN_DEVICE_FUNCTION(vkFreeCommandBuffers) From e594c6d193d1d0a27681bacccc4b5fe44369bbc4 Mon Sep 17 00:00:00 2001 From: Evan Hemsley Date: Mon, 18 May 2026 16:40:22 -0700 Subject: [PATCH 2/5] Add dynapi --- src/dynapi/SDL_dynapi.exports | 6 ++++++ src/dynapi/SDL_dynapi.sym | 6 ++++++ src/dynapi/SDL_dynapi_overrides.h | 6 ++++++ src/dynapi/SDL_dynapi_procs.h | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/src/dynapi/SDL_dynapi.exports b/src/dynapi/SDL_dynapi.exports index 9864557071..ff366516af 100644 --- a/src/dynapi/SDL_dynapi.exports +++ b/src/dynapi/SDL_dynapi.exports @@ -1290,3 +1290,9 @@ _SDL_LoadJPG _SDL_HasSVE2 _SDL_GamepadHasCapSense _SDL_GetGamepadCapSense +_SDL_CopyGPUQueryResultsToBuffer +_SDL_GetGPUTimestampFrequency +_SDL_CreateGPUQueryPool +_SDL_BeginGPUQuery +_SDL_EndGPUQuery +_SDL_ReleaseGPUQueryPool diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym index 3958a52aa6..b3d1c29dec 100644 --- a/src/dynapi/SDL_dynapi.sym +++ b/src/dynapi/SDL_dynapi.sym @@ -1291,6 +1291,12 @@ SDL3_0.0.0 { SDL_HasSVE2; SDL_GamepadHasCapSense; SDL_GetGamepadCapSense; + SDL_CopyGPUQueryResultsToBuffer; + SDL_GetGPUTimestampFrequency; + SDL_CreateGPUQueryPool; + SDL_BeginGPUQuery; + SDL_EndGPUQuery; + SDL_ReleaseGPUQueryPool; # extra symbols go here (don't modify this line) local: *; }; diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h index b54d32ae6d..d97e515a12 100644 --- a/src/dynapi/SDL_dynapi_overrides.h +++ b/src/dynapi/SDL_dynapi_overrides.h @@ -1317,3 +1317,9 @@ #define SDL_HasSVE2 SDL_HasSVE2_REAL #define SDL_GamepadHasCapSense SDL_GamepadHasCapSense_REAL #define SDL_GetGamepadCapSense SDL_GetGamepadCapSense_REAL +#define SDL_CopyGPUQueryResultsToBuffer SDL_CopyGPUQueryResultsToBuffer_REAL +#define SDL_GetGPUTimestampFrequency SDL_GetGPUTimestampFrequency_REAL +#define SDL_CreateGPUQueryPool SDL_CreateGPUQueryPool_REAL +#define SDL_BeginGPUQuery SDL_BeginGPUQuery_REAL +#define SDL_EndGPUQuery SDL_EndGPUQuery_REAL +#define SDL_ReleaseGPUQueryPool SDL_ReleaseGPUQueryPool_REAL diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index 4f8ac0ba0c..1da1797a95 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -1325,3 +1325,9 @@ SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG,(const char *a),(a),return) SDL_DYNAPI_PROC(bool,SDL_HasSVE2,(void),(),return) SDL_DYNAPI_PROC(bool,SDL_GamepadHasCapSense,(SDL_Gamepad *a,SDL_GamepadCapSenseType b),(a,b),return) SDL_DYNAPI_PROC(bool,SDL_GetGamepadCapSense,(SDL_Gamepad *a,SDL_GamepadCapSenseType b),(a,b),return) +SDL_DYNAPI_PROC(void,SDL_CopyGPUQueryResultsToBuffer,(SDL_GPUCopyPass *a,SDL_GPUQueryPool *b,Uint32 c,Uint32 d,SDL_GPUBufferLocation *e),(a,b,c,d,e),) +SDL_DYNAPI_PROC(Uint64,SDL_GetGPUTimestampFrequency,(SDL_GPUDevice *a),(a),return) +SDL_DYNAPI_PROC(SDL_GPUQueryPool*,SDL_CreateGPUQueryPool,(SDL_GPUDevice *a,SDL_GPUQueryPoolCreateInfo *b),(a,b),return) +SDL_DYNAPI_PROC(void,SDL_BeginGPUQuery,(SDL_GPUCommandBuffer *a,SDL_GPUQueryPool *b,Uint32 c),(a,b,c),) +SDL_DYNAPI_PROC(void,SDL_EndGPUQuery,(SDL_GPUCommandBuffer *a,SDL_GPUQueryPool *b,Uint32 c),(a,b,c),) +SDL_DYNAPI_PROC(void,SDL_ReleaseGPUQueryPool,(SDL_GPUDevice *a,SDL_GPUQueryPool *b),(a,b),) From 100e3d8c5af92710b84d3299d6c89aa31176df83 Mon Sep 17 00:00:00 2001 From: Evan Hemsley Date: Mon, 18 May 2026 16:55:31 -0700 Subject: [PATCH 3/5] Change GetTimestampFrequency to return float --- include/SDL3/SDL_gpu.h | 2 +- src/dynapi/SDL_dynapi_procs.h | 2 +- src/gpu/SDL_gpu.c | 7 +++++++ src/gpu/SDL_sysgpu.h | 5 +++++ src/gpu/vulkan/SDL_gpu_vulkan.c | 6 ++++++ 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h index 040b476675..320db4e54b 100644 --- a/include/SDL3/SDL_gpu.h +++ b/include/SDL3/SDL_gpu.h @@ -4571,7 +4571,7 @@ extern SDL_DECLSPEC void SDLCALL SDL_ReleaseGPUFence( * * \sa SDL_CreateGPUQueryPool */ -extern SDL_DECLSPEC Uint64 SDLCALL SDL_GetGPUTimestampFrequency(SDL_GPUDevice *device); +extern SDL_DECLSPEC float SDLCALL SDL_GetGPUTimestampFrequency(SDL_GPUDevice *device); /** * Creates a query pool object to be used in queries. diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index 1da1797a95..b1415eca63 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -1326,7 +1326,7 @@ SDL_DYNAPI_PROC(bool,SDL_HasSVE2,(void),(),return) SDL_DYNAPI_PROC(bool,SDL_GamepadHasCapSense,(SDL_Gamepad *a,SDL_GamepadCapSenseType b),(a,b),return) SDL_DYNAPI_PROC(bool,SDL_GetGamepadCapSense,(SDL_Gamepad *a,SDL_GamepadCapSenseType b),(a,b),return) SDL_DYNAPI_PROC(void,SDL_CopyGPUQueryResultsToBuffer,(SDL_GPUCopyPass *a,SDL_GPUQueryPool *b,Uint32 c,Uint32 d,SDL_GPUBufferLocation *e),(a,b,c,d,e),) -SDL_DYNAPI_PROC(Uint64,SDL_GetGPUTimestampFrequency,(SDL_GPUDevice *a),(a),return) +SDL_DYNAPI_PROC(float,SDL_GetGPUTimestampFrequency,(SDL_GPUDevice *a),(a),return) SDL_DYNAPI_PROC(SDL_GPUQueryPool*,SDL_CreateGPUQueryPool,(SDL_GPUDevice *a,SDL_GPUQueryPoolCreateInfo *b),(a,b),return) SDL_DYNAPI_PROC(void,SDL_BeginGPUQuery,(SDL_GPUCommandBuffer *a,SDL_GPUQueryPool *b,Uint32 c),(a,b,c),) SDL_DYNAPI_PROC(void,SDL_EndGPUQuery,(SDL_GPUCommandBuffer *a,SDL_GPUQueryPool *b,Uint32 c),(a,b,c),) diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c index bc7c106ef3..914e77c29a 100644 --- a/src/gpu/SDL_gpu.c +++ b/src/gpu/SDL_gpu.c @@ -3541,6 +3541,13 @@ void SDL_ReleaseGPUFence( fence); } +float SDL_GetGPUTimestampFrequency(SDL_GPUDevice *device) +{ + CHECK_DEVICE_MAGIC(device, 0); + + return device->GetTimestampFrequency(device->driverData); +} + SDL_GPUQueryPool *SDL_CreateGPUQueryPool( SDL_GPUDevice *device, SDL_GPUQueryPoolCreateInfo *createinfo) diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h index 91f78b537b..eba51afdb2 100644 --- a/src/gpu/SDL_sysgpu.h +++ b/src/gpu/SDL_sysgpu.h @@ -1104,6 +1104,10 @@ struct SDL_GPUDevice SDL_GPURenderer *driverData, SDL_GPUFence *fence); + float (*GetTimestampFrequency)( + SDL_GPURenderer *device + ); + SDL_GPUQueryPool *(*CreateQueryPool)( SDL_GPURenderer *driverData, SDL_GPUQueryPoolCreateInfo *createinfo); @@ -1240,6 +1244,7 @@ struct SDL_GPUDevice ASSIGN_DRIVER_FUNC(WaitForFences, name) \ ASSIGN_DRIVER_FUNC(QueryFence, name) \ ASSIGN_DRIVER_FUNC(ReleaseFence, name) \ + ASSIGN_DRIVER_FUNC(GetTimestampFrequency, name) \ ASSIGN_DRIVER_FUNC(CreateQueryPool, name) \ ASSIGN_DRIVER_FUNC(BeginQuery, name) \ ASSIGN_DRIVER_FUNC(EndQuery, name) \ diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c index a2c00b8b57..a3915c578a 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan.c +++ b/src/gpu/vulkan/SDL_gpu_vulkan.c @@ -7123,6 +7123,12 @@ static SDL_GPUTransferBuffer *VULKAN_CreateTransferBuffer( debugName); } +static float VULKAN_GetTimestampFrequency(SDL_GPURenderer *driverData) +{ + VulkanRenderer *renderer = (VulkanRenderer *)driverData; + return renderer->physicalDeviceProperties.properties.limits.timestampPeriod; +} + static SDL_GPUQueryPool *VULKAN_CreateQueryPool( SDL_GPURenderer *driverData, SDL_GPUQueryPoolCreateInfo *createinfo) From 3441b18aa5928cf1842244008f8e77ca0db0d85b Mon Sep 17 00:00:00 2001 From: Evan Hemsley Date: Mon, 18 May 2026 17:47:48 -0700 Subject: [PATCH 4/5] Download results to transfer buffer --- include/SDL3/SDL_gpu.h | 8 ++++---- src/dynapi/SDL_dynapi.exports | 2 +- src/dynapi/SDL_dynapi.sym | 2 +- src/dynapi/SDL_dynapi_overrides.h | 2 +- src/dynapi/SDL_dynapi_procs.h | 2 +- src/gpu/SDL_gpu.c | 6 +++--- src/gpu/SDL_sysgpu.h | 6 +++--- src/gpu/vulkan/SDL_gpu_vulkan.c | 24 ++++++------------------ 8 files changed, 20 insertions(+), 32 deletions(-) diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h index 320db4e54b..7ee6c86256 100644 --- a/include/SDL3/SDL_gpu.h +++ b/include/SDL3/SDL_gpu.h @@ -606,7 +606,7 @@ typedef struct SDL_GPUFence SDL_GPUFence; * \sa SDL_ReleaseGPUQueryPool * \sa SDL_BeginGPUQuery * \sa SDL_EndGPUQuery - * \sa SDL_CopyGPUQueryResultsToBuffer + * \sa SDL_DownloadGPUQueryResults * \sa SDL_GetGPUTimestampFrequency */ typedef struct SDL_GPUQueryPool SDL_GPUQueryPool; @@ -4088,12 +4088,12 @@ extern SDL_DECLSPEC void SDLCALL SDL_DownloadFromGPUBuffer( * * \since This struct is available since SDL 3.6.0. */ -extern SDL_DECLSPEC void SDLCALL SDL_CopyGPUQueryResultsToBuffer( +extern SDL_DECLSPEC void SDLCALL SDL_DownloadGPUQueryResults( SDL_GPUCopyPass *copy_pass, SDL_GPUQueryPool *pool, Uint32 first_query, Uint32 count, - SDL_GPUBufferLocation *destination); + SDL_GPUTransferBufferLocation *destination); /** * Ends the current copy pass. @@ -4586,7 +4586,7 @@ extern SDL_DECLSPEC float SDLCALL SDL_GetGPUTimestampFrequency(SDL_GPUDevice *de * \sa SDL_GetGPUTimestampFrequency * \sa SDL_BeginGPUQuery * \sa SDL_EndGPUQuery - * \sa SDL_CopyGPUQueryResultsToBuffer + * \sa SDL_DownloadGPUQueryResults * \sa SDL_ReleaseGPUQueryPool */ extern SDL_DECLSPEC SDL_GPUQueryPool * SDLCALL SDL_CreateGPUQueryPool( diff --git a/src/dynapi/SDL_dynapi.exports b/src/dynapi/SDL_dynapi.exports index ff366516af..87e4072ecd 100644 --- a/src/dynapi/SDL_dynapi.exports +++ b/src/dynapi/SDL_dynapi.exports @@ -1290,7 +1290,7 @@ _SDL_LoadJPG _SDL_HasSVE2 _SDL_GamepadHasCapSense _SDL_GetGamepadCapSense -_SDL_CopyGPUQueryResultsToBuffer +_SDL_DownloadGPUQueryResults _SDL_GetGPUTimestampFrequency _SDL_CreateGPUQueryPool _SDL_BeginGPUQuery diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym index b3d1c29dec..4871a32753 100644 --- a/src/dynapi/SDL_dynapi.sym +++ b/src/dynapi/SDL_dynapi.sym @@ -1291,7 +1291,7 @@ SDL3_0.0.0 { SDL_HasSVE2; SDL_GamepadHasCapSense; SDL_GetGamepadCapSense; - SDL_CopyGPUQueryResultsToBuffer; + SDL_DownloadGPUQueryResults; SDL_GetGPUTimestampFrequency; SDL_CreateGPUQueryPool; SDL_BeginGPUQuery; diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h index d97e515a12..16435fbe87 100644 --- a/src/dynapi/SDL_dynapi_overrides.h +++ b/src/dynapi/SDL_dynapi_overrides.h @@ -1317,7 +1317,7 @@ #define SDL_HasSVE2 SDL_HasSVE2_REAL #define SDL_GamepadHasCapSense SDL_GamepadHasCapSense_REAL #define SDL_GetGamepadCapSense SDL_GetGamepadCapSense_REAL -#define SDL_CopyGPUQueryResultsToBuffer SDL_CopyGPUQueryResultsToBuffer_REAL +#define SDL_DownloadGPUQueryResults SDL_DownloadGPUQueryResults_REAL #define SDL_GetGPUTimestampFrequency SDL_GetGPUTimestampFrequency_REAL #define SDL_CreateGPUQueryPool SDL_CreateGPUQueryPool_REAL #define SDL_BeginGPUQuery SDL_BeginGPUQuery_REAL diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index b1415eca63..c099b31c77 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -1325,7 +1325,7 @@ SDL_DYNAPI_PROC(SDL_Surface*,SDL_LoadJPG,(const char *a),(a),return) SDL_DYNAPI_PROC(bool,SDL_HasSVE2,(void),(),return) SDL_DYNAPI_PROC(bool,SDL_GamepadHasCapSense,(SDL_Gamepad *a,SDL_GamepadCapSenseType b),(a,b),return) SDL_DYNAPI_PROC(bool,SDL_GetGamepadCapSense,(SDL_Gamepad *a,SDL_GamepadCapSenseType b),(a,b),return) -SDL_DYNAPI_PROC(void,SDL_CopyGPUQueryResultsToBuffer,(SDL_GPUCopyPass *a,SDL_GPUQueryPool *b,Uint32 c,Uint32 d,SDL_GPUBufferLocation *e),(a,b,c,d,e),) +SDL_DYNAPI_PROC(void,SDL_DownloadGPUQueryResults,(SDL_GPUCopyPass *a,SDL_GPUQueryPool *b,Uint32 c,Uint32 d,SDL_GPUTransferBufferLocation *e),(a,b,c,d,e),) SDL_DYNAPI_PROC(float,SDL_GetGPUTimestampFrequency,(SDL_GPUDevice *a),(a),return) SDL_DYNAPI_PROC(SDL_GPUQueryPool*,SDL_CreateGPUQueryPool,(SDL_GPUDevice *a,SDL_GPUQueryPoolCreateInfo *b),(a,b),return) SDL_DYNAPI_PROC(void,SDL_BeginGPUQuery,(SDL_GPUCommandBuffer *a,SDL_GPUQueryPool *b,Uint32 c),(a,b,c),) diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c index 914e77c29a..fa57b49d33 100644 --- a/src/gpu/SDL_gpu.c +++ b/src/gpu/SDL_gpu.c @@ -3020,12 +3020,12 @@ void SDL_DownloadFromGPUBuffer( destination); } -void SDL_CopyGPUQueryResultsToBuffer( +void SDL_DownloadGPUQueryResults( SDL_GPUCopyPass *copy_pass, SDL_GPUQueryPool *pool, Uint32 first_query, Uint32 count, - SDL_GPUBufferLocation *destination) + SDL_GPUTransferBufferLocation *destination) { CHECK_PARAM(copy_pass == NULL) { SDL_InvalidParamError("copy_pass"); @@ -3042,7 +3042,7 @@ void SDL_CopyGPUQueryResultsToBuffer( return; } - COPYPASS_DEVICE->CopyQueryResultsToBuffer( + COPYPASS_DEVICE->DownloadQueryResults( COPYPASS_COMMAND_BUFFER, pool, first_query, diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h index eba51afdb2..d5deb16666 100644 --- a/src/gpu/SDL_sysgpu.h +++ b/src/gpu/SDL_sysgpu.h @@ -995,12 +995,12 @@ struct SDL_GPUDevice Uint32 size, bool cycle); - void (*CopyQueryResultsToBuffer)( + void (*DownloadQueryResults)( SDL_GPUCommandBuffer *commandBuffer, SDL_GPUQueryPool *pool, Uint32 first_query, Uint32 count, - const SDL_GPUBufferLocation *destination); + const SDL_GPUTransferBufferLocation *destination); void (*GenerateMipmaps)( SDL_GPUCommandBuffer *commandBuffer, @@ -1222,7 +1222,7 @@ struct SDL_GPUDevice ASSIGN_DRIVER_FUNC(DownloadFromBuffer, name) \ ASSIGN_DRIVER_FUNC(CopyTextureToTexture, name) \ ASSIGN_DRIVER_FUNC(CopyBufferToBuffer, name) \ - ASSIGN_DRIVER_FUNC(CopyQueryResultsToBuffer, name) \ + ASSIGN_DRIVER_FUNC(DownloadQueryResults, name) \ ASSIGN_DRIVER_FUNC(GenerateMipmaps, name) \ ASSIGN_DRIVER_FUNC(EndCopyPass, name) \ ASSIGN_DRIVER_FUNC(Blit, name) \ diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c index a3915c578a..3cfbf2df84 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan.c +++ b/src/gpu/vulkan/SDL_gpu_vulkan.c @@ -9368,46 +9368,34 @@ static void VULKAN_CopyBufferToBuffer( SDL_UnlockRWLock(renderer->defragLock); } -static void VULKAN_CopyQueryResultsToBuffer( +static void VULKAN_DownloadQueryResults( SDL_GPUCommandBuffer *commandBuffer, SDL_GPUQueryPool *pool, Uint32 firstQuery, Uint32 count, - const SDL_GPUBufferLocation *destination) + const SDL_GPUTransferBufferLocation *destination) { VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; VulkanRenderer *renderer = vulkanCommandBuffer->renderer; VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; - VulkanBufferContainer *dstContainer = (VulkanBufferContainer *)destination->buffer; + VulkanBufferContainer *dstContainer = (VulkanBufferContainer *)destination->transfer_buffer; SDL_LockRWLockForReading(renderer->defragLock); - VulkanBuffer *dstBuffer = VULKAN_INTERNAL_PrepareBufferForWrite( - renderer, - vulkanCommandBuffer, - dstContainer, - false, // TODO: should this function take a cycle param? - VULKAN_BUFFER_USAGE_MODE_COPY_DESTINATION); + // Note that the transfer buffer does not need a barrier, as it is synced by the client renderer->vkCmdCopyQueryPoolResults( vulkanCommandBuffer->commandBuffer, vulkanQueryPool->pool, firstQuery, count, - dstBuffer->buffer, + dstContainer->activeBuffer->buffer, destination->offset, 8, // Result for timing and occlusion is one 64-bit integer VK_QUERY_RESULT_64_BIT); - VULKAN_INTERNAL_BufferTransitionToDefaultUsage( - renderer, - vulkanCommandBuffer, - VULKAN_BUFFER_USAGE_MODE_COPY_DESTINATION, - dstBuffer); - VULKAN_INTERNAL_TrackQueryPool(vulkanCommandBuffer, vulkanQueryPool); - VULKAN_INTERNAL_TrackBuffer(vulkanCommandBuffer, dstBuffer); - VULKAN_INTERNAL_TrackBufferTransfer(vulkanCommandBuffer, dstBuffer); + VULKAN_INTERNAL_TrackBuffer(vulkanCommandBuffer, dstContainer->activeBuffer); SDL_UnlockRWLock(renderer->defragLock); } From d95bc52642f840e2b7cd6c2f618a775d79486780 Mon Sep 17 00:00:00 2001 From: Evan Hemsley Date: Wed, 20 May 2026 11:55:06 -0700 Subject: [PATCH 5/5] Fix validation errors --- src/gpu/vulkan/SDL_gpu_vulkan.c | 8 +++++++- src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c index 3cfbf2df84..fb1f28fab5 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan.c +++ b/src/gpu/vulkan/SDL_gpu_vulkan.c @@ -7140,7 +7140,7 @@ static SDL_GPUQueryPool *VULKAN_CreateQueryPool( vkQueryPoolCreateInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; vkQueryPoolCreateInfo.pNext = NULL; - vkQueryPoolCreateInfo.flags = VK_QUERY_POOL_CREATE_RESET_BIT_KHR; + vkQueryPoolCreateInfo.flags = 0; vkQueryPoolCreateInfo.pipelineStatistics = 0; vkQueryPoolCreateInfo.queryCount = createinfo->query_count; vkQueryPoolCreateInfo.queryType = SDLToVK_QueryType[createinfo->type]; @@ -10020,6 +10020,12 @@ static void VULKAN_BeginQuery( VulkanRenderer *renderer = vulkanCommandBuffer->renderer; VulkanQueryPool *vulkanQueryPool = (VulkanQueryPool *)pool; + renderer->vkCmdResetQueryPool( + vulkanCommandBuffer->commandBuffer, + vulkanQueryPool->pool, + index, + 1); + // Timestamp queries don't begin and end, we just need a distinction between // a timestamp written when preceding commands are taken and when preceding commands are finished. if (vulkanQueryPool->type == SDL_GPU_QUERY_TIMESTAMP) { diff --git a/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h b/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h index 8940316dcb..518523311f 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h +++ b/src/gpu/vulkan/SDL_gpu_vulkan_vkfuncs.h @@ -109,6 +109,7 @@ VULKAN_DEVICE_FUNCTION(vkCmdDrawIndirect) VULKAN_DEVICE_FUNCTION(vkCmdEndRenderPass) VULKAN_DEVICE_FUNCTION(vkCmdEndQuery) VULKAN_DEVICE_FUNCTION(vkCmdPipelineBarrier) +VULKAN_DEVICE_FUNCTION(vkCmdResetQueryPool) VULKAN_DEVICE_FUNCTION(vkCmdResolveImage) VULKAN_DEVICE_FUNCTION(vkCmdSetBlendConstants) VULKAN_DEVICE_FUNCTION(vkCmdSetDepthBias)