From f1f4e3421b6ca2f3a229879955a008d67e7a6114 Mon Sep 17 00:00:00 2001 From: Aiden <68633820+awils27@users.noreply.github.com> Date: Tue, 12 May 2026 01:08:32 +1000 Subject: [PATCH] Frame timing --- .../videoio/RenderOutputQueue.cpp | 12 ++ .../videoio/RenderOutputQueue.h | 1 + .../videoio/VideoBackend.cpp | 37 ++++--- .../videoio/VideoBackend.h | 2 + ...PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md | 104 ++++++++++++++---- tests/RenderOutputQueueTests.cpp | 24 ++++ 6 files changed, 141 insertions(+), 39 deletions(-) diff --git a/apps/LoopThroughWithOpenGLCompositing/videoio/RenderOutputQueue.cpp b/apps/LoopThroughWithOpenGLCompositing/videoio/RenderOutputQueue.cpp index 0f9ec6f..c68660b 100644 --- a/apps/LoopThroughWithOpenGLCompositing/videoio/RenderOutputQueue.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/videoio/RenderOutputQueue.cpp @@ -47,6 +47,18 @@ bool RenderOutputQueue::TryPop(RenderOutputFrame& frame) return true; } +bool RenderOutputQueue::DropOldestFrame() +{ + std::lock_guard lock(mMutex); + if (mReadyFrames.empty()) + return false; + + ReleaseFrame(mReadyFrames.front()); + mReadyFrames.pop_front(); + ++mDroppedCount; + return true; +} + void RenderOutputQueue::Clear() { std::lock_guard lock(mMutex); diff --git a/apps/LoopThroughWithOpenGLCompositing/videoio/RenderOutputQueue.h b/apps/LoopThroughWithOpenGLCompositing/videoio/RenderOutputQueue.h index 9c2d90d..0a9109a 100644 --- a/apps/LoopThroughWithOpenGLCompositing/videoio/RenderOutputQueue.h +++ b/apps/LoopThroughWithOpenGLCompositing/videoio/RenderOutputQueue.h @@ -34,6 +34,7 @@ public: void Configure(const VideoPlayoutPolicy& policy); bool Push(RenderOutputFrame frame); bool TryPop(RenderOutputFrame& frame); + bool DropOldestFrame(); void Clear(); RenderOutputQueueMetrics GetMetrics() const; diff --git a/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.cpp b/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.cpp index 196050c..ac9b476 100644 --- a/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.cpp @@ -359,6 +359,12 @@ void VideoBackend::StartOutputProducerWorker() if (mOutputProducerWorkerRunning) return; + const double frameBudgetMilliseconds = State().frameBudgetMilliseconds; + const auto frameDuration = frameBudgetMilliseconds > 0.0 + ? std::chrono::duration_cast( + std::chrono::duration(frameBudgetMilliseconds)) + : std::chrono::milliseconds(16); + mRenderCadenceController.Configure(frameDuration, std::chrono::steady_clock::now()); mLastOutputProductionCompletion = VideoIOCompletion(); mLastOutputProductionTime = std::chrono::steady_clock::time_point(); mOutputProducerWorkerStopping = false; @@ -433,11 +439,16 @@ void VideoBackend::OutputProducerWorkerMain() const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics(); RecordReadyQueueDepthSample(metrics); - const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics)); - if (decision.action != OutputProductionAction::Produce || decision.requestedFrames == 0) + + const auto now = std::chrono::steady_clock::now(); + RenderCadenceDecision cadenceDecision = mRenderCadenceController.Tick(now); + if (cadenceDecision.action == RenderCadenceAction::Wait) { + const auto waitDuration = (std::min)( + std::chrono::duration_cast(cadenceDecision.waitDuration), + OutputProducerWakeInterval()); std::unique_lock lock(mOutputProducerMutex); - mOutputProducerCondition.wait_for(lock, OutputProducerWakeInterval()); + mOutputProducerCondition.wait_for(lock, waitDuration); if (mOutputProducerWorkerStopping) { mOutputProducerWorkerRunning = false; @@ -454,16 +465,7 @@ void VideoBackend::OutputProducerWorkerMain() completion = mLastOutputProductionCompletion; } - const bool belowTargetDepth = metrics.depth < decision.targetReadyFrames; - const auto now = std::chrono::steady_clock::now(); - if (!belowTargetDepth && - mLastOutputProductionTime != std::chrono::steady_clock::time_point() && - now - mLastOutputProductionTime < OutputProducerWakeInterval()) - { - continue; - } - - const std::size_t producedFrames = ProduceReadyOutputFrames(completion, decision.requestedFrames); + const std::size_t producedFrames = ProduceReadyOutputFrames(completion, 1); if (producedFrames > 0) { mLastOutputProductionTime = std::chrono::steady_clock::now(); @@ -600,10 +602,6 @@ std::size_t VideoBackend::ProduceReadyOutputFrames(const VideoIOCompletion& comp std::size_t producedFrames = 0; while (producedFrames < maxFrames) { - const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics)); - if (decision.action != OutputProductionAction::Produce) - break; - if (!RenderReadyOutputFrame(mVideoIODevice->State(), completion)) break; ++producedFrames; @@ -634,7 +632,10 @@ bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const Video VideoIOOutputFrame outputFrame; const auto acquireStart = std::chrono::steady_clock::now(); if (!mSystemOutputFramePool.AcquireFreeSlot(outputSlot)) - return false; + { + if (!mReadyOutputQueue.DropOldestFrame() || !mSystemOutputFramePool.AcquireFreeSlot(outputSlot)) + return false; + } outputFrame = outputSlot.frame; const auto acquireEnd = std::chrono::steady_clock::now(); diff --git a/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.h b/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.h index da7fdf7..f8066b6 100644 --- a/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.h +++ b/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.h @@ -1,6 +1,7 @@ #pragma once #include "OutputProductionController.h" +#include "RenderCadenceController.h" #include "RenderOutputQueue.h" #include "SystemOutputFramePool.h" #include "VideoBackendLifecycle.h" @@ -105,6 +106,7 @@ private: VideoBackendLifecycle mLifecycle; VideoPlayoutPolicy mPlayoutPolicy; OutputProductionController mOutputProductionController; + RenderCadenceController mRenderCadenceController; RenderOutputQueue mReadyOutputQueue; SystemOutputFramePool mSystemOutputFramePool; std::unique_ptr mVideoIODevice; diff --git a/docs/PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md b/docs/PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md index ac37b76..88a84b8 100644 --- a/docs/PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md +++ b/docs/PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md @@ -2,7 +2,15 @@ ## Status -Proposed. +In progress. + +Implemented so far: + +- real DeckLink buffered-frame telemetry is exposed separately from synthetic scheduler lead +- pure `RenderCadenceController` exists with non-GL tests +- `SystemOutputFramePool` now exposes the Phase 7.7 state vocabulary: `Free`, `Rendering`, `Completed`, `Scheduled` +- the output producer now uses `RenderCadenceController` to render one output frame per cadence tick +- DeckLink scheduling remains a separate top-up pass capped by the configured preroll target Phase 7.5 and 7.6 proved useful pieces individually: @@ -38,6 +46,16 @@ DeckLink playout scheduler The system-memory frame buffer becomes the contract between render timing and device timing. +Core principle: + +- The render cadence should be stable and boring. +- If the selected output mode is 59.94 fps, the render producer should attempt to render at 59.94 fps. +- It should not speed up just because the DeckLink buffer is empty. +- It should not slow down because DeckLink is full or because completed frames have not drained. +- Completed-but-unscheduled frames are a latest-N cache. Old completed frames may be dropped/recycled to keep rendering at cadence. +- Scheduled frames are protected until DeckLink completes them. +- The only normal reason for the render cadence to deviate is that rendering/GPU work itself overruns the frame budget. + ## Non-Goals - Do not hide failure by repeating frames as the primary strategy. @@ -64,6 +82,14 @@ That means the system can be full and still look wrong, because "full" is not ti ### Target Shape ```text +Startup / warmup + render cadence starts first + render thread produces warmup frames at the selected cadence + completed system-memory queue reaches warmup target + DeckLink preroll is scheduled from completed frames + DeckLink playback starts with a filled buffer + +Steady state RenderCadenceController owns output frame tick: frame 0, 1, 2... owns render target time @@ -73,7 +99,8 @@ RenderCadenceController PlayoutFrameStore owns free / rendering / completed / scheduled slots tracks frame number, render time, completion time, and schedule state - exposes completed frames to DeckLink scheduler + exposes latest completed frames to DeckLink scheduler + may drop/recycle oldest unscheduled completed frames when render cadence needs space DeckLinkPlayoutScheduler owns DeckLink schedule time @@ -111,14 +138,28 @@ Rules: - If the render thread is early, it waits/yields. - If it is slightly late, it renders the next frame immediately and records lateness. -- If it is badly late, policy may skip render ticks before rendering the newest frame. -- Skipping render ticks is a render-cadence decision, not a DeckLink stream-time jump. +- If it is badly late because render/GPU work overran the frame budget, policy may skip render ticks before rendering the newest frame. +- Skipping render ticks is an overrun policy, not a buffer-fill strategy. - DeckLink schedule time should remain continuous unless a deliberate device recovery policy says otherwise. +Non-rule: + +- The render producer must not render faster than the selected cadence to refill DeckLink. +- DeckLink should start only after warmup/preroll has filled enough completed frames. +- If the DeckLink buffer drains in steady state, that is a real timing failure to measure, not a signal for the render thread to sprint. + ## Buffer Model Use a fixed system-memory slot pool. +The completed portion of the pool is not a strict consume-before-render queue. It is a latest-N rendered-frame cache: + +- render cadence writes one frame per selected output tick +- if completed-but-unscheduled frames are full, the oldest completed frame is disposable +- DeckLink scheduling consumes from the completed cache when it needs frames +- frames already scheduled to DeckLink are never recycled until completion +- if all slots are scheduled/in flight, cadence may miss because there is genuinely no safe system-memory target + Suggested starting values: - completed-frame target: 2-4 frames @@ -266,14 +307,14 @@ Before more scheduling changes, measure the real device buffer. Deliverables: -- call DeckLink `GetBufferedVideoFrameCount()` after schedule/completion where available -- expose `actualDeckLinkBufferedFrames` -- keep `scheduledLeadFrames` but label it synthetic/internal -- record schedule-call duration and failures +- [x] call DeckLink `GetBufferedVideoFrameCount()` after schedule/completion where available +- [x] expose `actualDeckLinkBufferedFrames` +- [x] keep `scheduledLeadFrames` but label it synthetic/internal +- [x] record schedule-call duration and failures Exit criteria: -- runtime telemetry distinguishes app completed queue, system scheduled slots, synthetic lead, and actual DeckLink buffer depth +- [x] runtime telemetry distinguishes app completed queue, system scheduled slots, synthetic lead, and actual DeckLink buffer depth ### Step 2: Rename Existing Queues To Match Their Roles @@ -295,17 +336,17 @@ Add a pure timing helper first. Responsibilities: -- compute next render tick -- track frame duration -- report early/late/drift -- decide whether to render, wait, or skip render ticks +- [x] compute next render tick +- [x] track frame duration +- [x] report early/late/drift +- [x] decide whether to render, wait, or skip render ticks Tests: -- exact cadence advances -- late ticks are measured -- large lateness can skip according to policy -- no dependency on GL or DeckLink +- [x] exact cadence advances +- [x] late ticks are measured +- [x] large lateness can skip according to policy +- [x] no dependency on GL or DeckLink ### Step 4: Move Output Production To Cadence Ticks @@ -313,15 +354,36 @@ Replace queue-pressure-only production with cadence-driven production. Initial behavior: -- render at selected output cadence -- produce into system-memory slots -- publish completed frames -- pause when completed queue is at max depth +- [x] render at selected output cadence +- [x] produce into system-memory slots +- [x] publish completed frames +- [x] recycle/drop oldest unscheduled completed frames when cadence needs a slot +- [ ] only wait when every safe slot is scheduled/in flight Exit criteria: - output rendering continues without DeckLink completions - output rendering does not schedule DeckLink directly +- completed-frame buffering behaves as latest-N, not consume-before-render + +### Step 4a: Add Warmup Before DeckLink Playback + +DeckLink output should not start consuming before the render cadence has prepared an initial cushion. + +Initial behavior: + +- configure DeckLink output without starting scheduled playback +- start the render cadence producer +- render warmup frames at the selected cadence, not faster +- wait until completed-frame depth reaches `targetWarmupFrames` +- schedule those completed frames as DeckLink preroll +- call `StartScheduledPlayback()` + +Exit criteria: + +- startup does not require the render producer to catch up by rendering faster than cadence +- DeckLink begins playback with a real completed-frame buffer +- if warmup cannot fill within a bounded timeout, startup enters degraded state with telemetry ### Step 5: Make DeckLink Scheduler A Separate Top-Up Loop diff --git a/tests/RenderOutputQueueTests.cpp b/tests/RenderOutputQueueTests.cpp index f3c258d..698f647 100644 --- a/tests/RenderOutputQueueTests.cpp +++ b/tests/RenderOutputQueueTests.cpp @@ -94,6 +94,29 @@ void TestOverflowReleasesDroppedFrame() Expect(gReleasedFrames == 1, "pop transfers ownership without releasing"); } +void TestDropOldestFrameReleasesFrame() +{ + gReleasedFrames = 0; + VideoPlayoutPolicy policy; + policy.maxReadyFrames = 2; + RenderOutputQueue queue(policy); + + queue.Push(MakeOwnedFrame(1)); + queue.Push(MakeOwnedFrame(2)); + + Expect(queue.DropOldestFrame(), "oldest ready frame can be explicitly dropped"); + Expect(gReleasedFrames == 1, "explicit drop releases oldest frame"); + + RenderOutputQueueMetrics metrics = queue.GetMetrics(); + Expect(metrics.depth == 1, "explicit drop reduces queue depth"); + Expect(metrics.droppedCount == 1, "explicit drop increments dropped count"); + + RenderOutputFrame frame; + Expect(queue.TryPop(frame), "newest frame remains after explicit drop"); + Expect(frame.frameIndex == 2, "explicit drop keeps newest frame"); + Expect(!queue.DropOldestFrame(), "empty queue cannot drop a frame"); +} + void TestUnderrunIsCounted() { RenderOutputQueue queue; @@ -169,6 +192,7 @@ int main() TestQueuePreservesOrdering(); TestBoundedQueueDropsOldestFrame(); TestOverflowReleasesDroppedFrame(); + TestDropOldestFrameReleasesFrame(); TestUnderrunIsCounted(); TestConfigureShrinksDepthToNewCapacity(); TestConfigureReleasesTrimmedFrames();