From aa68821970bfe3b2027b52283caa943fcb6472aa Mon Sep 17 00:00:00 2001 From: Alex Tselousov Date: Tue, 17 Mar 2026 18:48:52 +0300 Subject: [PATCH] Removed busyloop from SDL_GPUFence on MacOS Before, MetalFence was implemented as simply a busy loop on an atomic int on metal, meaning the cpu would busy wait on the gpu to finish taking power from it and decreasing battery life. This was the only kind of cpu-gpu syncing (apart from requesting a swapchain) --- src/gpu/metal/SDL_gpu_metal.m | 61 +++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m index 1b69546c7f..c064b21f93 100644 --- a/src/gpu/metal/SDL_gpu_metal.m +++ b/src/gpu/metal/SDL_gpu_metal.m @@ -430,6 +430,7 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode( // Structs typedef struct MetalRenderer MetalRenderer; +typedef struct MetalCommandBuffer MetalCommandBuffer; typedef struct MetalTexture { @@ -453,7 +454,8 @@ typedef struct MetalTextureContainer typedef struct MetalFence { - SDL_AtomicInt complete; + // can be NULL if the command buffer was recycled + MetalCommandBuffer *commandBuffer; SDL_AtomicInt referenceCount; } MetalFence; @@ -2093,7 +2095,6 @@ static Uint8 METAL_INTERNAL_CreateFence( MetalFence *fence; fence = SDL_calloc(1, sizeof(MetalFence)); - SDL_SetAtomicInt(&fence->complete, 0); SDL_SetAtomicInt(&fence->referenceCount, 0); // Add it to the available pool @@ -2136,7 +2137,7 @@ static bool METAL_INTERNAL_AcquireFence( // Associate the fence with the command buffer commandBuffer->fence = fence; - SDL_SetAtomicInt(&fence->complete, 0); // FIXME: Is this right? + fence->commandBuffer = commandBuffer; (void)SDL_AtomicIncRef(&commandBuffer->fence->referenceCount); return true; @@ -3517,6 +3518,8 @@ static void METAL_INTERNAL_CleanCommandBuffer( METAL_ReleaseFence( (SDL_GPURenderer *)renderer, (SDL_GPUFence *)commandBuffer->fence); + } else { + commandBuffer->fence->commandBuffer = NULL; } // Return command buffer to pool @@ -3583,6 +3586,16 @@ static void METAL_INTERNAL_PerformPendingDestroys( } // Fences +static bool METAL_INTERNAL_IsFenceBusy( + MetalFence *fence +) { + if (!fence->commandBuffer) { + return false; // command buffer was recycled + } + + MTLCommandBufferStatus status = fence->commandBuffer->handle.status; + return status == MTLCommandBufferStatusCommitted || status == MTLCommandBufferStatusScheduled; +} static bool METAL_WaitForFences( SDL_GPURenderer *driverData, @@ -3592,24 +3605,29 @@ static bool METAL_WaitForFences( { @autoreleasepool { MetalRenderer *renderer = (MetalRenderer *)driverData; - bool waiting; if (waitAll) { for (Uint32 i = 0; i < numFences; i += 1) { - while (!SDL_GetAtomicInt(&((MetalFence *)fences[i])->complete)) { - // Spin! + MetalFence *fence = (MetalFence *)fences[i]; + if (METAL_INTERNAL_IsFenceBusy(fence)) { + [fence->commandBuffer->handle waitUntilCompleted]; } } } else { - waiting = 1; - while (waiting) { - for (Uint32 i = 0; i < numFences; i += 1) { - if (SDL_GetAtomicInt(&((MetalFence *)fences[i])->complete) > 0) { - waiting = 0; - break; - } - } + dispatch_semaphore_t semaphore = dispatch_semaphore_create(0); + for (Uint32 i = 0; i < numFences; i += 1) { + MetalFence *fence = (MetalFence *)fences[i]; + // command buffer has completed and been recycled + if(!fence->commandBuffer) + return true; + + // even if it's completed, the handle will call back straight away + [fence->commandBuffer->handle addCompletedHandler:^(id buffer) { + dispatch_semaphore_signal(semaphore); + }]; } + + dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER); } METAL_INTERNAL_PerformPendingDestroys(renderer); @@ -3623,7 +3641,7 @@ static bool METAL_QueryFence( SDL_GPUFence *fence) { MetalFence *metalFence = (MetalFence *)fence; - return SDL_GetAtomicInt(&metalFence->complete) == 1; + return METAL_INTERNAL_IsFenceBusy(metalFence); } // Window and Swapchain Management @@ -4086,11 +4104,6 @@ static bool METAL_Submit( windowData->frameCounter = (windowData->frameCounter + 1) % renderer->allowedFramesInFlight; } - // Notify the fence when the command buffer has completed - [metalCommandBuffer->handle addCompletedHandler:^(id buffer) { - SDL_AtomicIncRef(&metalCommandBuffer->fence->complete); - }]; - // Submit the command buffer [metalCommandBuffer->handle commit]; metalCommandBuffer->handle = nil; @@ -4108,7 +4121,8 @@ static bool METAL_Submit( // Check if we can perform any cleanups for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) { - if (SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) { + + if (!METAL_INTERNAL_IsFenceBusy(renderer->submittedCommandBuffers[i]->fence)) { METAL_INTERNAL_CleanCommandBuffer( renderer, renderer->submittedCommandBuffers[i], @@ -4161,9 +4175,8 @@ static bool METAL_Wait( * Sort of equivalent to vkDeviceWaitIdle. */ for (Uint32 i = 0; i < renderer->submittedCommandBufferCount; i += 1) { - while (!SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) { - // Spin! - } + SDL_GPUFence *opaqueFence = (SDL_GPUFence *)renderer->submittedCommandBuffers[i]->fence; + METAL_WaitForFences(driverData, true, &opaqueFence, 1); } SDL_LockMutex(renderer->submitLock);