V2 working
All checks were successful
CI / React UI Build (push) Successful in 11s
CI / Native Windows Build And Tests (push) Successful in 2m54s
CI / Windows Release Package (push) Successful in 3m14s

This commit is contained in:
Aiden
2026-05-12 01:59:02 +10:00
parent 2531d871e8
commit e0ca548ef5
32 changed files with 3492 additions and 0 deletions

17
.vscode/launch.json vendored
View File

@@ -83,6 +83,23 @@
"moduleLoad": true
},
"preLaunchTask": "Build DeckLinkRenderCadenceProbe Debug x64"
},
{
"name": "Debug RenderCadenceCompositor",
"type": "cppvsdbg",
"request": "launch",
"program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\RenderCadenceCompositor.exe",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
"environment": [],
"console": "externalTerminal",
"symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
"requireExactSource": true,
"logging": {
"moduleLoad": true
},
"preLaunchTask": "Build RenderCadenceCompositor Debug x64"
}
]
}

16
.vscode/tasks.json vendored
View File

@@ -52,6 +52,22 @@
"group": "build",
"problemMatcher": "$msCompile"
},
{
"label": "Build RenderCadenceCompositor Debug x64",
"type": "process",
"command": "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\CMake\\bin\\cmake.exe",
"args": [
"--build",
"${workspaceFolder}\\build\\vs2022-x64-debug",
"--config",
"Debug",
"--target",
"RenderCadenceCompositor",
"--parallel"
],
"group": "build",
"problemMatcher": "$msCompile"
},
{
"label": "Clean LoopThroughWithOpenGLCompositing Debug x64",
"type": "process",

View File

@@ -273,6 +273,82 @@ if(MSVC)
target_compile_options(DeckLinkRenderCadenceProbe PRIVATE /W3)
endif()
set(RENDER_CADENCE_APP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/apps/RenderCadenceCompositor")
set(RENDER_CADENCE_APP_SOURCES
"${APP_DIR}/videoio/decklink/DeckLinkAPI_i.c"
"${APP_DIR}/videoio/decklink/DeckLinkDisplayMode.cpp"
"${APP_DIR}/videoio/decklink/DeckLinkDisplayMode.h"
"${APP_DIR}/videoio/decklink/DeckLinkFrameTransfer.cpp"
"${APP_DIR}/videoio/decklink/DeckLinkFrameTransfer.h"
"${APP_DIR}/videoio/decklink/DeckLinkSession.cpp"
"${APP_DIR}/videoio/decklink/DeckLinkSession.h"
"${APP_DIR}/videoio/decklink/DeckLinkVideoIOFormat.cpp"
"${APP_DIR}/videoio/decklink/DeckLinkVideoIOFormat.h"
"${APP_DIR}/gl/renderer/GLExtensions.cpp"
"${APP_DIR}/gl/renderer/GLExtensions.h"
"${APP_DIR}/videoio/VideoIOFormat.cpp"
"${APP_DIR}/videoio/VideoIOFormat.h"
"${APP_DIR}/videoio/VideoIOTypes.h"
"${APP_DIR}/videoio/VideoPlayoutPolicy.h"
"${APP_DIR}/videoio/VideoPlayoutScheduler.cpp"
"${APP_DIR}/videoio/VideoPlayoutScheduler.h"
"${RENDER_CADENCE_APP_DIR}/RenderCadenceCompositor.cpp"
"${RENDER_CADENCE_APP_DIR}/app/AppConfig.cpp"
"${RENDER_CADENCE_APP_DIR}/app/AppConfig.h"
"${RENDER_CADENCE_APP_DIR}/app/RenderCadenceApp.h"
"${RENDER_CADENCE_APP_DIR}/frames/SystemFrameExchange.cpp"
"${RENDER_CADENCE_APP_DIR}/frames/SystemFrameExchange.h"
"${RENDER_CADENCE_APP_DIR}/frames/SystemFrameTypes.h"
"${RENDER_CADENCE_APP_DIR}/platform/HiddenGlWindow.cpp"
"${RENDER_CADENCE_APP_DIR}/platform/HiddenGlWindow.h"
"${RENDER_CADENCE_APP_DIR}/render/Bgra8ReadbackPipeline.cpp"
"${RENDER_CADENCE_APP_DIR}/render/Bgra8ReadbackPipeline.h"
"${RENDER_CADENCE_APP_DIR}/render/PboReadbackRing.cpp"
"${RENDER_CADENCE_APP_DIR}/render/PboReadbackRing.h"
"${RENDER_CADENCE_APP_DIR}/render/RenderCadenceClock.cpp"
"${RENDER_CADENCE_APP_DIR}/render/RenderCadenceClock.h"
"${RENDER_CADENCE_APP_DIR}/render/RenderThread.cpp"
"${RENDER_CADENCE_APP_DIR}/render/RenderThread.h"
"${RENDER_CADENCE_APP_DIR}/render/SimpleMotionRenderer.cpp"
"${RENDER_CADENCE_APP_DIR}/render/SimpleMotionRenderer.h"
"${RENDER_CADENCE_APP_DIR}/telemetry/CadenceTelemetry.h"
"${RENDER_CADENCE_APP_DIR}/telemetry/TelemetryPrinter.h"
"${RENDER_CADENCE_APP_DIR}/video/DeckLinkOutput.cpp"
"${RENDER_CADENCE_APP_DIR}/video/DeckLinkOutput.h"
"${RENDER_CADENCE_APP_DIR}/video/DeckLinkOutputThread.h"
)
add_executable(RenderCadenceCompositor ${RENDER_CADENCE_APP_SOURCES})
target_include_directories(RenderCadenceCompositor PRIVATE
"${APP_DIR}"
"${APP_DIR}/gl/renderer"
"${APP_DIR}/videoio"
"${APP_DIR}/videoio/decklink"
"${RENDER_CADENCE_APP_DIR}"
"${RENDER_CADENCE_APP_DIR}/app"
"${RENDER_CADENCE_APP_DIR}/frames"
"${RENDER_CADENCE_APP_DIR}/platform"
"${RENDER_CADENCE_APP_DIR}/render"
"${RENDER_CADENCE_APP_DIR}/telemetry"
"${RENDER_CADENCE_APP_DIR}/video"
)
target_link_libraries(RenderCadenceCompositor PRIVATE
opengl32
Ole32
)
target_compile_definitions(RenderCadenceCompositor PRIVATE
_UNICODE
UNICODE
)
if(MSVC)
target_compile_options(RenderCadenceCompositor PRIVATE /W3)
endif()
add_executable(RuntimeJsonTests
"${APP_DIR}/runtime/support/RuntimeJson.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/tests/RuntimeJsonTests.cpp"
@@ -642,6 +718,23 @@ endif()
add_test(NAME RenderCadenceControllerTests COMMAND RenderCadenceControllerTests)
add_executable(RenderCadenceCompositorFrameExchangeTests
"${APP_DIR}/videoio/VideoIOFormat.cpp"
"${RENDER_CADENCE_APP_DIR}/frames/SystemFrameExchange.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/tests/RenderCadenceCompositorFrameExchangeTests.cpp"
)
target_include_directories(RenderCadenceCompositorFrameExchangeTests PRIVATE
"${APP_DIR}/videoio"
"${RENDER_CADENCE_APP_DIR}/frames"
)
if(MSVC)
target_compile_options(RenderCadenceCompositorFrameExchangeTests PRIVATE /W3)
endif()
add_test(NAME RenderCadenceCompositorFrameExchangeTests COMMAND RenderCadenceCompositorFrameExchangeTests)
add_executable(SystemOutputFramePoolTests
"${APP_DIR}/videoio/SystemOutputFramePool.cpp"
"${APP_DIR}/videoio/VideoIOFormat.cpp"

View File

@@ -0,0 +1,157 @@
# RenderCadenceCompositor
This app is the modular version of the working DeckLink render-cadence probe.
Its job is to prove the production-facing foundation before the current compositor's shader/runtime/control features are ported over.
## Architecture
```text
RenderThread
owns a hidden OpenGL context
renders simple BGRA8 motion at selected cadence
queues async PBO readback
publishes completed frames into SystemFrameExchange
SystemFrameExchange
owns Free / Rendering / Completed / Scheduled slots
drops old completed unscheduled frames when render needs space
protects scheduled frames until DeckLink completion
DeckLinkOutputThread
consumes completed system-memory frames
schedules them into DeckLink up to target depth
never renders
```
Startup warms up real rendered frames before DeckLink scheduled playback starts.
## Current Scope
Included now:
- output-only DeckLink
- hidden render-thread-owned OpenGL context
- simple smooth-motion renderer
- BGRA8-only output
- async PBO readback
- latest-N system-memory frame exchange
- rendered-frame warmup
- compact telemetry
- non-GL frame-exchange tests
Intentionally not included yet:
- DeckLink input
- shader package rendering
- runtime state
- OSC/API control
- preview
- screenshots
- persistence
Those features should be ported only after the cadence spine is stable.
## Build
```powershell
cmake --build --preset build-debug --target RenderCadenceCompositor -- /m:1
```
The executable is:
```text
build\vs2022-x64-debug\Debug\RenderCadenceCompositor.exe
```
## Run
Run from VS Code with:
```text
Debug RenderCadenceCompositor
```
Or from a terminal:
```powershell
build\vs2022-x64-debug\Debug\RenderCadenceCompositor.exe
```
Press Enter to stop.
## Expected Telemetry
The app prints one line per second:
```text
renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 completedPollMisses=0 scheduleFailures=0 completions=119 late=0 dropped=0 decklinkBuffered=4 scheduleCallMs=0.0
```
Healthy first-run signs:
- visible DeckLink output is smooth
- `renderFps` is close to the selected cadence
- `scheduleFps` is close to the selected cadence after warmup
- `scheduled` stays near 4
- `decklinkBuffered` stays near 4 when available
- `late` and `dropped` do not increase continuously
- `scheduleFailures` does not increase
`completedPollMisses` means the DeckLink scheduling thread woke up before a completed frame was available. It is not a DeckLink playout underrun by itself. Treat it as healthy polling noise when `scheduled`, `decklinkBuffered`, `late`, `dropped`, and `scheduleFailures` remain stable.
## Baseline Result
Date: 2026-05-12
User-visible result:
- output was smooth
- DeckLink held a 4-frame buffer
Representative telemetry:
```text
renderFps=59.9 scheduleFps=59.9 free=8 completed=0 scheduled=4 completedPollMisses=30 scheduleFailures=0 completions=720 late=0 dropped=0 decklinkBuffered=4 scheduleCallMs=1.2
renderFps=59.8 scheduleFps=59.8 free=7 completed=1 scheduled=4 completedPollMisses=36 scheduleFailures=0 completions=1080 late=0 dropped=0 decklinkBuffered=4 scheduleCallMs=4.7
renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 completedPollMisses=86 scheduleFailures=0 completions=1381 late=0 dropped=0 decklinkBuffered=4 scheduleCallMs=2.1
```
Read:
- render cadence and DeckLink schedule cadence both held roughly 60 fps
- app scheduled depth stayed at 4
- actual DeckLink buffered depth stayed at 4
- no late frames, dropped frames, or schedule failures were observed
- completed poll misses were benign because playout remained fully fed
## Tests
```powershell
cmake --build --preset build-debug --target RenderCadenceCompositorFrameExchangeTests -- /m:1
ctest --test-dir build\vs2022-x64-debug -C Debug -R RenderCadenceCompositorFrameExchangeTests --output-on-failure
```
## Relationship To The Probe
`apps/DeckLinkRenderCadenceProbe` proved the timing model in one compact file.
This app keeps the same core behavior but splits it into modules that can grow:
- `frames/`: system-memory handoff
- `platform/`: COM/Win32/hidden GL context support
- `render/`: cadence, simple rendering, PBO readback
- `video/`: DeckLink output wrapper and scheduling thread
- `telemetry/`: cadence telemetry
- `app/`: startup/shutdown orchestration
## Next Porting Steps
Only after this app matches the probe's smooth output:
1. replace `SimpleMotionRenderer` with a render-scene interface
2. port shader package rendering
3. port runtime snapshots/live state
4. add control services
5. add preview/screenshot from system-memory frames
6. add DeckLink input as a CPU latest-frame mailbox

View File

@@ -0,0 +1,83 @@
#include "app/AppConfig.h"
#include "app/RenderCadenceApp.h"
#include "frames/SystemFrameExchange.h"
#include "render/RenderThread.h"
#include "VideoIOFormat.h"
#include <windows.h>
#include <iostream>
#include <string>
namespace
{
class ComInitGuard
{
public:
~ComInitGuard()
{
if (mInitialized)
CoUninitialize();
}
bool Initialize()
{
const HRESULT result = CoInitialize(nullptr);
mInitialized = SUCCEEDED(result);
mResult = result;
return mInitialized;
}
HRESULT Result() const { return mResult; }
private:
bool mInitialized = false;
HRESULT mResult = S_OK;
};
}
int main()
{
ComInitGuard com;
if (!com.Initialize())
{
std::cerr << "COM initialization failed: 0x" << std::hex << com.Result() << std::dec << "\n";
return 1;
}
std::cout << "RenderCadenceCompositor\n"
<< " Starts render cadence, system-memory exchange, DeckLink scheduled output, and telemetry.\n"
<< " Press Enter to stop.\n";
SystemFrameExchangeConfig frameExchangeConfig;
frameExchangeConfig.width = 1920;
frameExchangeConfig.height = 1080;
frameExchangeConfig.pixelFormat = VideoIOPixelFormat::Bgra8;
frameExchangeConfig.rowBytes = VideoIORowBytes(frameExchangeConfig.pixelFormat, frameExchangeConfig.width);
frameExchangeConfig.capacity = 12;
SystemFrameExchange frameExchange(frameExchangeConfig);
RenderThread::Config renderConfig;
renderConfig.width = frameExchangeConfig.width;
renderConfig.height = frameExchangeConfig.height;
renderConfig.frameDurationMilliseconds = 1000.0 / 59.94;
renderConfig.pboDepth = 6;
RenderThread renderThread(frameExchange, renderConfig);
RenderCadenceCompositor::AppConfig appConfig = RenderCadenceCompositor::DefaultAppConfig();
RenderCadenceCompositor::RenderCadenceApp<RenderThread, SystemFrameExchange> app(renderThread, frameExchange, appConfig);
std::string error;
if (!app.Start(error))
{
std::cerr << "RenderCadenceCompositor start failed: " << error << "\n";
return 1;
}
std::string line;
std::getline(std::cin, line);
app.Stop();
return 0;
}

View File

@@ -0,0 +1,18 @@
#include "AppConfig.h"
namespace RenderCadenceCompositor
{
AppConfig DefaultAppConfig()
{
AppConfig config;
config.deckLink.externalKeyingEnabled = false;
config.deckLink.outputAlphaRequired = false;
config.outputThread.targetBufferedFrames = 4;
config.telemetry.interval = std::chrono::seconds(1);
config.warmupCompletedFrames = 4;
config.warmupTimeout = std::chrono::seconds(3);
config.prerollTimeout = std::chrono::seconds(3);
config.prerollPoll = std::chrono::milliseconds(2);
return config;
}
}

View File

@@ -0,0 +1,24 @@
#pragma once
#include "../telemetry/TelemetryPrinter.h"
#include "../video/DeckLinkOutput.h"
#include "../video/DeckLinkOutputThread.h"
#include <chrono>
#include <cstddef>
namespace RenderCadenceCompositor
{
struct AppConfig
{
DeckLinkOutputConfig deckLink;
DeckLinkOutputThreadConfig outputThread;
TelemetryPrinterConfig telemetry;
std::size_t warmupCompletedFrames = 4;
std::chrono::milliseconds warmupTimeout = std::chrono::seconds(3);
std::chrono::milliseconds prerollTimeout = std::chrono::seconds(3);
std::chrono::milliseconds prerollPoll = std::chrono::milliseconds(2);
};
AppConfig DefaultAppConfig();
}

View File

@@ -0,0 +1,148 @@
#pragma once
#include "AppConfig.h"
#include "../telemetry/TelemetryPrinter.h"
#include "../video/DeckLinkOutput.h"
#include "../video/DeckLinkOutputThread.h"
#include <chrono>
#include <string>
#include <thread>
#include <type_traits>
namespace RenderCadenceCompositor
{
namespace detail
{
template <typename RenderThread>
auto StartRenderThread(RenderThread& renderThread, std::string& error, int) -> decltype(renderThread.Start(error), bool())
{
return renderThread.Start(error);
}
template <typename RenderThread>
bool StartRenderThreadWithoutError(RenderThread& renderThread, std::true_type)
{
return renderThread.Start();
}
template <typename RenderThread>
bool StartRenderThreadWithoutError(RenderThread& renderThread, std::false_type)
{
renderThread.Start();
return true;
}
template <typename RenderThread>
auto StartRenderThread(RenderThread& renderThread, std::string&, long) -> decltype(renderThread.Start(), bool())
{
return StartRenderThreadWithoutError(renderThread, std::is_same<decltype(renderThread.Start()), bool>());
}
}
template <typename RenderThread, typename SystemFrameExchange>
class RenderCadenceApp
{
public:
RenderCadenceApp(RenderThread& renderThread, SystemFrameExchange& frameExchange, AppConfig config = DefaultAppConfig()) :
mRenderThread(renderThread),
mFrameExchange(frameExchange),
mConfig(config),
mOutputThread(mOutput, mFrameExchange, mConfig.outputThread),
mTelemetry(mConfig.telemetry)
{
}
RenderCadenceApp(const RenderCadenceApp&) = delete;
RenderCadenceApp& operator=(const RenderCadenceApp&) = delete;
~RenderCadenceApp()
{
Stop();
}
bool Start(std::string& error)
{
if (!mOutput.Initialize(
mConfig.deckLink,
[this](const VideoIOCompletion& completion) {
mFrameExchange.ReleaseScheduledByBytes(completion.outputFrameBuffer);
},
error))
{
return false;
}
if (!detail::StartRenderThread(mRenderThread, error, 0))
{
Stop();
return false;
}
if (!mFrameExchange.WaitForCompletedDepth(mConfig.warmupCompletedFrames, mConfig.warmupTimeout))
{
error = "Timed out waiting for rendered warmup frames.";
Stop();
return false;
}
if (!mOutputThread.Start())
{
error = "DeckLink output thread failed to start.";
Stop();
return false;
}
if (!WaitForPreroll())
{
error = "Timed out waiting for DeckLink preroll frames.";
Stop();
return false;
}
if (!mOutput.StartScheduledPlayback(error))
{
Stop();
return false;
}
mTelemetry.Start(mFrameExchange, mOutput, mOutputThread);
mStarted = true;
return true;
}
void Stop()
{
mTelemetry.Stop();
mOutputThread.Stop();
mOutput.Stop();
mRenderThread.Stop();
mOutput.ReleaseResources();
mStarted = false;
}
bool Started() const { return mStarted; }
const DeckLinkOutput& Output() const { return mOutput; }
private:
bool WaitForPreroll() const
{
const auto deadline = std::chrono::steady_clock::now() + mConfig.prerollTimeout;
while (std::chrono::steady_clock::now() < deadline)
{
if (mFrameExchange.Metrics().scheduledCount >= mConfig.outputThread.targetBufferedFrames)
return true;
std::this_thread::sleep_for(mConfig.prerollPoll);
}
return false;
}
RenderThread& mRenderThread;
SystemFrameExchange& mFrameExchange;
AppConfig mConfig;
DeckLinkOutput mOutput;
DeckLinkOutputThread<SystemFrameExchange> mOutputThread;
TelemetryPrinter mTelemetry;
bool mStarted = false;
};
}

View File

@@ -0,0 +1,245 @@
#include "SystemFrameExchange.h"
namespace
{
SystemFrameExchangeConfig NormalizeConfig(SystemFrameExchangeConfig config)
{
if (config.rowBytes == 0)
config.rowBytes = VideoIORowBytes(config.pixelFormat, config.width);
return config;
}
}
SystemFrameExchange::SystemFrameExchange(const SystemFrameExchangeConfig& config)
{
Configure(config);
}
void SystemFrameExchange::Configure(const SystemFrameExchangeConfig& config)
{
std::lock_guard<std::mutex> lock(mMutex);
mConfig = NormalizeConfig(config);
mCompletedIndices.clear();
mSlots.clear();
mSlots.resize(mConfig.capacity);
const std::size_t byteCount = FrameByteCount();
for (Slot& slot : mSlots)
{
slot.bytes.resize(byteCount);
slot.state = SystemFrameSlotState::Free;
slot.frameIndex = 0;
++slot.generation;
}
mCounters = SystemFrameExchangeMetrics();
mCondition.notify_all();
}
SystemFrameExchangeConfig SystemFrameExchange::Config() const
{
std::lock_guard<std::mutex> lock(mMutex);
return mConfig;
}
bool SystemFrameExchange::AcquireForRender(SystemFrame& frame)
{
std::lock_guard<std::mutex> lock(mMutex);
if (!AcquireFreeLocked(frame))
{
if (!DropOldestCompletedLocked() || !AcquireFreeLocked(frame))
{
frame = SystemFrame();
++mCounters.acquireMisses;
return false;
}
}
++mCounters.acquiredFrames;
return true;
}
bool SystemFrameExchange::PublishCompleted(const SystemFrame& frame)
{
std::lock_guard<std::mutex> lock(mMutex);
if (!IsValidLocked(frame))
return false;
Slot& slot = mSlots[frame.index];
if (slot.state != SystemFrameSlotState::Rendering)
return false;
slot.state = SystemFrameSlotState::Completed;
slot.frameIndex = frame.frameIndex;
mCompletedIndices.push_back(frame.index);
++mCounters.completedFrames;
mCondition.notify_all();
return true;
}
bool SystemFrameExchange::ConsumeCompletedForSchedule(SystemFrame& frame)
{
std::lock_guard<std::mutex> lock(mMutex);
while (!mCompletedIndices.empty())
{
const std::size_t index = mCompletedIndices.front();
mCompletedIndices.pop_front();
if (index >= mSlots.size() || mSlots[index].state != SystemFrameSlotState::Completed)
continue;
mSlots[index].state = SystemFrameSlotState::Scheduled;
FillFrameLocked(index, frame);
++mCounters.scheduledFrames;
return true;
}
frame = SystemFrame();
++mCounters.completedPollMisses;
return false;
}
bool SystemFrameExchange::ReleaseScheduledByBytes(void* bytes)
{
if (bytes == nullptr)
return false;
std::lock_guard<std::mutex> lock(mMutex);
for (std::size_t index = 0; index < mSlots.size(); ++index)
{
Slot& slot = mSlots[index];
if (slot.bytes.empty() || slot.bytes.data() != bytes)
continue;
if (slot.state != SystemFrameSlotState::Scheduled)
return false;
slot.state = SystemFrameSlotState::Free;
slot.frameIndex = 0;
++slot.generation;
mCondition.notify_all();
return true;
}
return false;
}
bool SystemFrameExchange::WaitForCompletedDepth(std::size_t targetDepth, std::chrono::milliseconds timeout)
{
std::unique_lock<std::mutex> lock(mMutex);
return mCondition.wait_for(lock, timeout, [&]() {
return CompletedCountLocked() >= targetDepth;
});
}
void SystemFrameExchange::Clear()
{
std::lock_guard<std::mutex> lock(mMutex);
mCompletedIndices.clear();
for (Slot& slot : mSlots)
{
slot.state = SystemFrameSlotState::Free;
slot.frameIndex = 0;
++slot.generation;
}
mCondition.notify_all();
}
SystemFrameExchangeMetrics SystemFrameExchange::Metrics() const
{
std::lock_guard<std::mutex> lock(mMutex);
SystemFrameExchangeMetrics metrics = mCounters;
metrics.capacity = mSlots.size();
metrics.completedDepth = mCompletedIndices.size();
for (const Slot& slot : mSlots)
{
switch (slot.state)
{
case SystemFrameSlotState::Free:
++metrics.freeCount;
break;
case SystemFrameSlotState::Rendering:
++metrics.renderingCount;
break;
case SystemFrameSlotState::Completed:
++metrics.completedCount;
break;
case SystemFrameSlotState::Scheduled:
++metrics.scheduledCount;
break;
}
}
return metrics;
}
bool SystemFrameExchange::AcquireFreeLocked(SystemFrame& frame)
{
for (std::size_t index = 0; index < mSlots.size(); ++index)
{
Slot& slot = mSlots[index];
if (slot.state != SystemFrameSlotState::Free)
continue;
slot.state = SystemFrameSlotState::Rendering;
++slot.generation;
FillFrameLocked(index, frame);
return true;
}
return false;
}
bool SystemFrameExchange::DropOldestCompletedLocked()
{
while (!mCompletedIndices.empty())
{
const std::size_t index = mCompletedIndices.front();
mCompletedIndices.pop_front();
if (index >= mSlots.size() || mSlots[index].state != SystemFrameSlotState::Completed)
continue;
Slot& slot = mSlots[index];
slot.state = SystemFrameSlotState::Free;
slot.frameIndex = 0;
++slot.generation;
++mCounters.completedDrops;
mCondition.notify_all();
return true;
}
return false;
}
bool SystemFrameExchange::IsValidLocked(const SystemFrame& frame) const
{
return frame.index < mSlots.size() && mSlots[frame.index].generation == frame.generation;
}
void SystemFrameExchange::FillFrameLocked(std::size_t index, SystemFrame& frame)
{
Slot& slot = mSlots[index];
frame.bytes = slot.bytes.empty() ? nullptr : slot.bytes.data();
frame.rowBytes = static_cast<long>(mConfig.rowBytes);
frame.width = mConfig.width;
frame.height = mConfig.height;
frame.pixelFormat = mConfig.pixelFormat;
frame.index = index;
frame.generation = slot.generation;
frame.frameIndex = slot.frameIndex;
}
std::size_t SystemFrameExchange::CompletedCountLocked() const
{
std::size_t count = 0;
for (const Slot& slot : mSlots)
{
if (slot.state == SystemFrameSlotState::Completed)
++count;
}
return count;
}
std::size_t SystemFrameExchange::FrameByteCount() const
{
return static_cast<std::size_t>(mConfig.rowBytes) * static_cast<std::size_t>(mConfig.height);
}

View File

@@ -0,0 +1,51 @@
#pragma once
#include "SystemFrameTypes.h"
#include <chrono>
#include <condition_variable>
#include <deque>
#include <mutex>
#include <vector>
class SystemFrameExchange
{
public:
SystemFrameExchange() = default;
explicit SystemFrameExchange(const SystemFrameExchangeConfig& config);
void Configure(const SystemFrameExchangeConfig& config);
SystemFrameExchangeConfig Config() const;
bool AcquireForRender(SystemFrame& frame);
bool PublishCompleted(const SystemFrame& frame);
bool ConsumeCompletedForSchedule(SystemFrame& frame);
bool ReleaseScheduledByBytes(void* bytes);
bool WaitForCompletedDepth(std::size_t targetDepth, std::chrono::milliseconds timeout);
void Clear();
SystemFrameExchangeMetrics Metrics() const;
private:
struct Slot
{
std::vector<unsigned char> bytes;
SystemFrameSlotState state = SystemFrameSlotState::Free;
uint64_t generation = 1;
uint64_t frameIndex = 0;
};
bool AcquireFreeLocked(SystemFrame& frame);
bool DropOldestCompletedLocked();
bool IsValidLocked(const SystemFrame& frame) const;
void FillFrameLocked(std::size_t index, SystemFrame& frame);
std::size_t CompletedCountLocked() const;
std::size_t FrameByteCount() const;
mutable std::mutex mMutex;
std::condition_variable mCondition;
SystemFrameExchangeConfig mConfig;
std::vector<Slot> mSlots;
std::deque<std::size_t> mCompletedIndices;
SystemFrameExchangeMetrics mCounters;
};

View File

@@ -0,0 +1,51 @@
#pragma once
#include "VideoIOFormat.h"
#include <cstddef>
#include <cstdint>
enum class SystemFrameSlotState
{
Free,
Rendering,
Completed,
Scheduled
};
struct SystemFrameExchangeConfig
{
unsigned width = 0;
unsigned height = 0;
VideoIOPixelFormat pixelFormat = VideoIOPixelFormat::Bgra8;
unsigned rowBytes = 0;
std::size_t capacity = 0;
};
struct SystemFrame
{
void* bytes = nullptr;
long rowBytes = 0;
unsigned width = 0;
unsigned height = 0;
VideoIOPixelFormat pixelFormat = VideoIOPixelFormat::Bgra8;
std::size_t index = 0;
uint64_t generation = 0;
uint64_t frameIndex = 0;
};
struct SystemFrameExchangeMetrics
{
std::size_t capacity = 0;
std::size_t freeCount = 0;
std::size_t renderingCount = 0;
std::size_t completedCount = 0;
std::size_t scheduledCount = 0;
std::size_t completedDepth = 0;
uint64_t acquiredFrames = 0;
uint64_t completedFrames = 0;
uint64_t scheduledFrames = 0;
uint64_t completedDrops = 0;
uint64_t acquireMisses = 0;
uint64_t completedPollMisses = 0;
};

View File

@@ -0,0 +1,120 @@
#include "HiddenGlWindow.h"
namespace
{
constexpr const char* kWindowClassName = "RenderCadenceCompositorHiddenGlWindow";
}
HiddenGlWindow::~HiddenGlWindow()
{
Destroy();
}
bool HiddenGlWindow::Create(unsigned width, unsigned height, std::string& error)
{
Destroy();
mInstance = GetModuleHandle(nullptr);
WNDCLASSA wc = {};
wc.style = CS_OWNDC;
wc.lpfnWndProc = HiddenGlWindow::WindowProc;
wc.hInstance = mInstance;
wc.lpszClassName = kWindowClassName;
mClassAtom = RegisterClassA(&wc);
if (mClassAtom == 0 && GetLastError() != ERROR_CLASS_ALREADY_EXISTS)
{
error = "RegisterClassA failed for hidden OpenGL window.";
return false;
}
mWindow = CreateWindowA(
kWindowClassName,
"Render Cadence Compositor Hidden GL",
WS_OVERLAPPEDWINDOW,
CW_USEDEFAULT,
CW_USEDEFAULT,
static_cast<int>(width),
static_cast<int>(height),
nullptr,
nullptr,
mInstance,
nullptr);
if (!mWindow)
{
error = "CreateWindowA failed for hidden OpenGL window.";
return false;
}
mDc = GetDC(mWindow);
if (!mDc)
{
error = "GetDC failed for hidden OpenGL window.";
return false;
}
PIXELFORMATDESCRIPTOR pfd = {};
pfd.nSize = sizeof(pfd);
pfd.nVersion = 1;
pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
pfd.iPixelType = PFD_TYPE_RGBA;
pfd.cColorBits = 32;
pfd.cDepthBits = 0;
pfd.iLayerType = PFD_MAIN_PLANE;
const int pixelFormat = ChoosePixelFormat(mDc, &pfd);
if (pixelFormat == 0 || !SetPixelFormat(mDc, pixelFormat, &pfd))
{
error = "Could not choose/set pixel format for hidden OpenGL window.";
return false;
}
mGlrc = wglCreateContext(mDc);
if (!mGlrc)
{
error = "wglCreateContext failed for hidden OpenGL window.";
return false;
}
return true;
}
bool HiddenGlWindow::MakeCurrent() const
{
return mDc != nullptr && mGlrc != nullptr && wglMakeCurrent(mDc, mGlrc) == TRUE;
}
void HiddenGlWindow::ClearCurrent() const
{
wglMakeCurrent(nullptr, nullptr);
}
void HiddenGlWindow::Destroy()
{
ClearCurrent();
if (mGlrc)
{
wglDeleteContext(mGlrc);
mGlrc = nullptr;
}
if (mWindow && mDc)
{
ReleaseDC(mWindow, mDc);
mDc = nullptr;
}
if (mWindow)
{
DestroyWindow(mWindow);
mWindow = nullptr;
}
mInstance = nullptr;
mClassAtom = 0;
}
LRESULT CALLBACK HiddenGlWindow::WindowProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam)
{
return DefWindowProc(hwnd, message, wParam, lParam);
}

View File

@@ -0,0 +1,31 @@
#pragma once
#include <windows.h>
#include <string>
class HiddenGlWindow
{
public:
HiddenGlWindow() = default;
HiddenGlWindow(const HiddenGlWindow&) = delete;
HiddenGlWindow& operator=(const HiddenGlWindow&) = delete;
~HiddenGlWindow();
bool Create(unsigned width, unsigned height, std::string& error);
bool MakeCurrent() const;
void ClearCurrent() const;
void Destroy();
HDC DeviceContext() const { return mDc; }
HGLRC Context() const { return mGlrc; }
private:
static LRESULT CALLBACK WindowProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam);
HINSTANCE mInstance = nullptr;
HWND mWindow = nullptr;
HDC mDc = nullptr;
HGLRC mGlrc = nullptr;
ATOM mClassAtom = 0;
};

View File

@@ -0,0 +1,142 @@
#include "Bgra8ReadbackPipeline.h"
#include "../frames/SystemFrameTypes.h"
#include <cstring>
Bgra8ReadbackPipeline::~Bgra8ReadbackPipeline()
{
Shutdown();
}
bool Bgra8ReadbackPipeline::Initialize(unsigned width, unsigned height, std::size_t pboDepth)
{
Shutdown();
mWidth = width;
mHeight = height;
mRowBytes = VideoIORowBytes(VideoIOPixelFormat::Bgra8, width);
if (mWidth == 0 || mHeight == 0 || mRowBytes == 0)
return false;
if (!CreateRenderTarget())
{
Shutdown();
return false;
}
const std::size_t byteCount = static_cast<std::size_t>(mRowBytes) * static_cast<std::size_t>(mHeight);
if (!mPboRing.Initialize(pboDepth, byteCount))
{
Shutdown();
return false;
}
return true;
}
void Bgra8ReadbackPipeline::Shutdown()
{
mPboRing.Shutdown();
DestroyRenderTarget();
mWidth = 0;
mHeight = 0;
mRowBytes = 0;
}
bool Bgra8ReadbackPipeline::RenderAndQueue(uint64_t frameIndex, const RenderCallback& renderFrame)
{
if (mFramebuffer == 0 || !renderFrame)
return false;
glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer);
renderFrame(frameIndex);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
return mPboRing.QueueReadback(mFramebuffer, mWidth, mHeight, frameIndex);
}
void Bgra8ReadbackPipeline::ConsumeCompleted(
const AcquireFrameCallback& acquireFrame,
const PublishFrameCallback& publishFrame,
const CounterCallback& onAcquireMiss,
const CounterCallback& onCompleted)
{
if (!acquireFrame || !publishFrame)
return;
PboReadbackRing::CompletedReadback readback;
while (mPboRing.TryAcquireCompleted(readback))
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, readback.pbo);
void* mapped = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
if (!mapped)
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
mPboRing.ReleaseCompleted(readback);
continue;
}
SystemFrame frame;
if (acquireFrame(frame))
{
const std::size_t byteCount = static_cast<std::size_t>(frame.rowBytes) * static_cast<std::size_t>(frame.height);
if (frame.bytes != nullptr && byteCount <= readback.byteCount)
{
std::memcpy(frame.bytes, mapped, byteCount);
frame.frameIndex = readback.frameIndex;
frame.pixelFormat = VideoIOPixelFormat::Bgra8;
publishFrame(frame);
if (onCompleted)
onCompleted();
}
}
else if (onAcquireMiss)
{
onAcquireMiss();
}
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
mPboRing.ReleaseCompleted(readback);
}
}
bool Bgra8ReadbackPipeline::CreateRenderTarget()
{
glGenFramebuffers(1, &mFramebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer);
glGenTextures(1, &mTexture);
glBindTexture(GL_TEXTURE_2D, mTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(
GL_TEXTURE_2D,
0,
GL_RGBA8,
static_cast<GLsizei>(mWidth),
static_cast<GLsizei>(mHeight),
0,
GL_BGRA,
GL_UNSIGNED_INT_8_8_8_8_REV,
nullptr);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mTexture, 0);
const bool complete = glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE;
glBindTexture(GL_TEXTURE_2D, 0);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
return complete;
}
void Bgra8ReadbackPipeline::DestroyRenderTarget()
{
if (mFramebuffer != 0)
glDeleteFramebuffers(1, &mFramebuffer);
if (mTexture != 0)
glDeleteTextures(1, &mTexture);
mFramebuffer = 0;
mTexture = 0;
}

View File

@@ -0,0 +1,52 @@
#pragma once
#include "PboReadbackRing.h"
#include "VideoIOFormat.h"
#include <cstddef>
#include <cstdint>
#include <functional>
struct SystemFrame;
class Bgra8ReadbackPipeline
{
public:
using RenderCallback = std::function<void(uint64_t frameIndex)>;
using AcquireFrameCallback = std::function<bool(SystemFrame& frame)>;
using PublishFrameCallback = std::function<bool(const SystemFrame& frame)>;
using CounterCallback = std::function<void()>;
Bgra8ReadbackPipeline() = default;
Bgra8ReadbackPipeline(const Bgra8ReadbackPipeline&) = delete;
Bgra8ReadbackPipeline& operator=(const Bgra8ReadbackPipeline&) = delete;
~Bgra8ReadbackPipeline();
bool Initialize(unsigned width, unsigned height, std::size_t pboDepth);
void Shutdown();
bool RenderAndQueue(uint64_t frameIndex, const RenderCallback& renderFrame);
void ConsumeCompleted(
const AcquireFrameCallback& acquireFrame,
const PublishFrameCallback& publishFrame,
const CounterCallback& onAcquireMiss = {},
const CounterCallback& onCompleted = {});
GLuint Framebuffer() const { return mFramebuffer; }
unsigned Width() const { return mWidth; }
unsigned Height() const { return mHeight; }
unsigned RowBytes() const { return mRowBytes; }
VideoIOPixelFormat PixelFormat() const { return VideoIOPixelFormat::Bgra8; }
uint64_t PboQueueMisses() const { return mPboRing.QueueMisses(); }
private:
bool CreateRenderTarget();
void DestroyRenderTarget();
unsigned mWidth = 0;
unsigned mHeight = 0;
unsigned mRowBytes = 0;
GLuint mFramebuffer = 0;
GLuint mTexture = 0;
PboReadbackRing mPboRing;
};

View File

@@ -0,0 +1,138 @@
#include "PboReadbackRing.h"
#include <algorithm>
PboReadbackRing::~PboReadbackRing()
{
Shutdown();
}
bool PboReadbackRing::Initialize(std::size_t depth, std::size_t byteCount)
{
Shutdown();
if (depth == 0 || byteCount == 0)
return false;
mSlots.resize(depth);
mByteCount = byteCount;
for (Slot& slot : mSlots)
{
glGenBuffers(1, &slot.pbo);
if (slot.pbo == 0)
{
Shutdown();
return false;
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pbo);
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(mByteCount), nullptr, GL_STREAM_READ);
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
return true;
}
void PboReadbackRing::Shutdown()
{
for (Slot& slot : mSlots)
{
if (slot.fence)
glDeleteSync(slot.fence);
if (slot.pbo != 0)
glDeleteBuffers(1, &slot.pbo);
slot = {};
}
mSlots.clear();
mWriteIndex = 0;
mReadIndex = 0;
mByteCount = 0;
}
bool PboReadbackRing::QueueReadback(GLuint framebuffer, unsigned width, unsigned height, uint64_t frameIndex)
{
if (mSlots.empty())
return false;
Slot& slot = mSlots[mWriteIndex];
if (slot.inFlight || slot.acquired)
{
++mQueueMisses;
return false;
}
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pbo);
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(mByteCount), nullptr, GL_STREAM_READ);
glReadPixels(0, 0, static_cast<GLsizei>(width), static_cast<GLsizei>(height), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
slot.inFlight = slot.fence != nullptr;
slot.frameIndex = frameIndex;
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
if (!slot.inFlight)
return false;
mWriteIndex = (mWriteIndex + 1) % mSlots.size();
return true;
}
bool PboReadbackRing::TryAcquireCompleted(CompletedReadback& readback)
{
if (mSlots.empty())
return false;
for (std::size_t checked = 0; checked < mSlots.size(); ++checked)
{
Slot& slot = mSlots[mReadIndex];
if (!slot.inFlight || slot.acquired || slot.fence == nullptr)
{
mReadIndex = (mReadIndex + 1) % mSlots.size();
continue;
}
const GLenum waitResult = glClientWaitSync(slot.fence, 0, 0);
if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
return false;
slot.acquired = true;
readback.pbo = slot.pbo;
readback.frameIndex = slot.frameIndex;
readback.byteCount = mByteCount;
return true;
}
return false;
}
void PboReadbackRing::ReleaseCompleted(const CompletedReadback& readback)
{
for (std::size_t index = 0; index < mSlots.size(); ++index)
{
Slot& slot = mSlots[index];
if (!slot.acquired || slot.pbo != readback.pbo)
continue;
ResetSlot(slot);
mReadIndex = (index + 1) % mSlots.size();
return;
}
}
void PboReadbackRing::DrainCompleted()
{
for (std::size_t pass = 0; pass < mSlots.size() * 2; ++pass)
{
CompletedReadback readback;
if (!TryAcquireCompleted(readback))
break;
ReleaseCompleted(readback);
}
}
void PboReadbackRing::ResetSlot(Slot& slot)
{
if (slot.fence)
glDeleteSync(slot.fence);
slot.fence = nullptr;
slot.inFlight = false;
slot.acquired = false;
slot.frameIndex = 0;
}

View File

@@ -0,0 +1,52 @@
#pragma once
#include "GLExtensions.h"
#include <cstddef>
#include <cstdint>
#include <vector>
class PboReadbackRing
{
public:
struct CompletedReadback
{
GLuint pbo = 0;
uint64_t frameIndex = 0;
std::size_t byteCount = 0;
};
PboReadbackRing() = default;
PboReadbackRing(const PboReadbackRing&) = delete;
PboReadbackRing& operator=(const PboReadbackRing&) = delete;
~PboReadbackRing();
bool Initialize(std::size_t depth, std::size_t byteCount);
void Shutdown();
bool QueueReadback(GLuint framebuffer, unsigned width, unsigned height, uint64_t frameIndex);
bool TryAcquireCompleted(CompletedReadback& readback);
void ReleaseCompleted(const CompletedReadback& readback);
void DrainCompleted();
std::size_t Depth() const { return mSlots.size(); }
uint64_t QueueMisses() const { return mQueueMisses; }
private:
struct Slot
{
GLuint pbo = 0;
GLsync fence = nullptr;
bool inFlight = false;
bool acquired = false;
uint64_t frameIndex = 0;
};
void ResetSlot(Slot& slot);
std::vector<Slot> mSlots;
std::size_t mWriteIndex = 0;
std::size_t mReadIndex = 0;
std::size_t mByteCount = 0;
uint64_t mQueueMisses = 0;
};

View File

@@ -0,0 +1,45 @@
#include "RenderCadenceClock.h"
#include <algorithm>
RenderCadenceClock::RenderCadenceClock(double frameDurationMilliseconds)
{
mFrameDuration = std::chrono::duration_cast<Duration>(std::chrono::duration<double, std::milli>(frameDurationMilliseconds));
if (mFrameDuration <= Duration::zero())
mFrameDuration = std::chrono::milliseconds(16);
Reset();
}
void RenderCadenceClock::Reset(TimePoint now)
{
mNextRenderTime = now;
mOverrunCount = 0;
mSkippedFrameCount = 0;
}
RenderCadenceClock::Tick RenderCadenceClock::Poll(TimePoint now)
{
Tick tick;
if (now < mNextRenderTime)
{
tick.sleepFor = std::min(Duration(std::chrono::milliseconds(1)), mNextRenderTime - now);
return tick;
}
tick.due = true;
const Duration lateBy = now - mNextRenderTime;
if (lateBy > mFrameDuration)
{
tick.skippedFrames = static_cast<uint64_t>(lateBy / mFrameDuration);
++mOverrunCount;
mSkippedFrameCount += tick.skippedFrames;
}
return tick;
}
void RenderCadenceClock::MarkRendered(TimePoint now)
{
mNextRenderTime += mFrameDuration;
if (now - mNextRenderTime > mFrameDuration * 4)
mNextRenderTime = now + mFrameDuration;
}

View File

@@ -0,0 +1,36 @@
#pragma once
#include <chrono>
#include <cstdint>
class RenderCadenceClock
{
public:
using Clock = std::chrono::steady_clock;
using Duration = Clock::duration;
using TimePoint = Clock::time_point;
struct Tick
{
bool due = false;
uint64_t skippedFrames = 0;
Duration sleepFor = Duration::zero();
};
explicit RenderCadenceClock(double frameDurationMilliseconds = 1000.0 / 60.0);
void Reset(TimePoint now = Clock::now());
Tick Poll(TimePoint now = Clock::now());
void MarkRendered(TimePoint now = Clock::now());
Duration FrameDuration() const { return mFrameDuration; }
TimePoint NextRenderTime() const { return mNextRenderTime; }
uint64_t OverrunCount() const { return mOverrunCount; }
uint64_t SkippedFrameCount() const { return mSkippedFrameCount; }
private:
Duration mFrameDuration;
TimePoint mNextRenderTime = Clock::now();
uint64_t mOverrunCount = 0;
uint64_t mSkippedFrameCount = 0;
};

View File

@@ -0,0 +1,181 @@
#include "RenderThread.h"
#include "../frames/SystemFrameExchange.h"
#include "../frames/SystemFrameTypes.h"
#include "../platform/HiddenGlWindow.h"
#include "Bgra8ReadbackPipeline.h"
#include "GLExtensions.h"
#include "SimpleMotionRenderer.h"
#include <algorithm>
#include <thread>
RenderThread::RenderThread(SystemFrameExchange& frameExchange, Config config) :
mFrameExchange(frameExchange),
mConfig(config)
{
}
RenderThread::~RenderThread()
{
Stop();
}
bool RenderThread::Start(std::string& error)
{
if (mThread.joinable())
return true;
{
std::lock_guard<std::mutex> lock(mStartupMutex);
mStarted = false;
mStartupError.clear();
}
mStopping.store(false, std::memory_order_release);
mThread = std::thread([this]() { ThreadMain(); });
std::unique_lock<std::mutex> lock(mStartupMutex);
if (!mStartupCondition.wait_for(lock, std::chrono::seconds(3), [this]() {
return mStarted || !mStartupError.empty();
}))
{
error = "Timed out starting render thread.";
return false;
}
if (!mStartupError.empty())
{
error = mStartupError;
lock.unlock();
if (mThread.joinable())
mThread.join();
return false;
}
return true;
}
void RenderThread::Stop()
{
mStopping.store(true, std::memory_order_release);
if (mThread.joinable())
mThread.join();
}
RenderThread::Metrics RenderThread::GetMetrics() const
{
std::lock_guard<std::mutex> lock(mMetricsMutex);
return mMetrics;
}
void RenderThread::ThreadMain()
{
HiddenGlWindow window;
std::string error;
if (!window.Create(mConfig.width, mConfig.height, error) || !window.MakeCurrent())
{
SignalStartupFailure(error.empty() ? "OpenGL context creation failed." : error);
return;
}
if (!ResolveGLExtensions())
{
SignalStartupFailure("OpenGL extension resolution failed.");
return;
}
SimpleMotionRenderer renderer;
Bgra8ReadbackPipeline readback;
if (!renderer.InitializeGl(mConfig.width, mConfig.height) || !readback.Initialize(mConfig.width, mConfig.height, mConfig.pboDepth))
{
SignalStartupFailure("Render pipeline initialization failed.");
return;
}
RenderCadenceClock clock(mConfig.frameDurationMilliseconds);
uint64_t frameIndex = 0;
mRunning.store(true, std::memory_order_release);
SignalStarted();
while (!mStopping.load(std::memory_order_acquire))
{
readback.ConsumeCompleted(
[this](SystemFrame& frame) { return mFrameExchange.AcquireForRender(frame); },
[this](const SystemFrame& frame) { return mFrameExchange.PublishCompleted(frame); },
[this]() {
CountAcquireMiss();
},
[this]() { CountCompleted(); });
const auto now = RenderCadenceClock::Clock::now();
const RenderCadenceClock::Tick tick = clock.Poll(now);
if (!tick.due)
{
if (tick.sleepFor > RenderCadenceClock::Duration::zero())
std::this_thread::sleep_for(tick.sleepFor);
continue;
}
if (!readback.RenderAndQueue(frameIndex, [&renderer](uint64_t index) { renderer.RenderFrame(index); }))
{
std::lock_guard<std::mutex> lock(mMetricsMutex);
++mMetrics.pboQueueMisses;
}
CountRendered();
++frameIndex;
clock.MarkRendered(RenderCadenceClock::Clock::now());
{
std::lock_guard<std::mutex> lock(mMetricsMutex);
mMetrics.clockOverruns = clock.OverrunCount();
mMetrics.skippedFrames = clock.SkippedFrameCount();
}
}
for (std::size_t i = 0; i < mConfig.pboDepth * 2; ++i)
{
readback.ConsumeCompleted(
[this](SystemFrame& frame) { return mFrameExchange.AcquireForRender(frame); },
[this](const SystemFrame& frame) { return mFrameExchange.PublishCompleted(frame); },
[this]() {
CountAcquireMiss();
},
[this]() { CountCompleted(); });
}
readback.Shutdown();
renderer.ShutdownGl();
window.ClearCurrent();
mRunning.store(false, std::memory_order_release);
}
void RenderThread::SignalStarted()
{
std::lock_guard<std::mutex> lock(mStartupMutex);
mStarted = true;
mStartupCondition.notify_all();
}
void RenderThread::SignalStartupFailure(const std::string& error)
{
std::lock_guard<std::mutex> lock(mStartupMutex);
mStartupError = error;
mStartupCondition.notify_all();
}
void RenderThread::CountRendered()
{
std::lock_guard<std::mutex> lock(mMetricsMutex);
++mMetrics.renderedFrames;
}
void RenderThread::CountCompleted()
{
std::lock_guard<std::mutex> lock(mMetricsMutex);
++mMetrics.completedReadbacks;
}
void RenderThread::CountAcquireMiss()
{
std::lock_guard<std::mutex> lock(mMetricsMutex);
++mMetrics.acquireMisses;
}

View File

@@ -0,0 +1,68 @@
#pragma once
#include "RenderCadenceClock.h"
#include <atomic>
#include <condition_variable>
#include <cstddef>
#include <cstdint>
#include <mutex>
#include <string>
#include <thread>
class SystemFrameExchange;
class RenderThread
{
public:
struct Config
{
unsigned width = 1920;
unsigned height = 1080;
double frameDurationMilliseconds = 1000.0 / 59.94;
std::size_t pboDepth = 6;
};
struct Metrics
{
uint64_t renderedFrames = 0;
uint64_t completedReadbacks = 0;
uint64_t acquireMisses = 0;
uint64_t pboQueueMisses = 0;
uint64_t clockOverruns = 0;
uint64_t skippedFrames = 0;
};
RenderThread(SystemFrameExchange& frameExchange, Config config);
RenderThread(const RenderThread&) = delete;
RenderThread& operator=(const RenderThread&) = delete;
~RenderThread();
bool Start(std::string& error);
void Stop();
Metrics GetMetrics() const;
bool IsRunning() const { return mRunning.load(std::memory_order_acquire); }
private:
void ThreadMain();
void SignalStarted();
void SignalStartupFailure(const std::string& error);
void CountRendered();
void CountCompleted();
void CountAcquireMiss();
SystemFrameExchange& mFrameExchange;
Config mConfig;
std::thread mThread;
std::atomic<bool> mStopping{ false };
std::atomic<bool> mRunning{ false };
mutable std::mutex mStartupMutex;
std::condition_variable mStartupCondition;
bool mStarted = false;
std::string mStartupError;
mutable std::mutex mMetricsMutex;
Metrics mMetrics;
};

View File

@@ -0,0 +1,50 @@
#include "SimpleMotionRenderer.h"
#include "GLExtensions.h"
#include <algorithm>
#include <cmath>
bool SimpleMotionRenderer::InitializeGl(unsigned width, unsigned height)
{
mWidth = width;
mHeight = height;
return mWidth > 0 && mHeight > 0;
}
void SimpleMotionRenderer::RenderFrame(uint64_t frameIndex)
{
const float t = static_cast<float>(frameIndex) / 60.0f;
const float red = 0.1f + 0.4f * (0.5f + 0.5f * std::sin(t));
const float green = 0.1f + 0.4f * (0.5f + 0.5f * std::sin(t * 0.73f + 1.0f));
const float blue = 0.15f + 0.3f * (0.5f + 0.5f * std::sin(t * 0.41f + 2.0f));
glViewport(0, 0, static_cast<GLsizei>(mWidth), static_cast<GLsizei>(mHeight));
glDisable(GL_SCISSOR_TEST);
glClearColor(red, green, blue, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
const int boxWidth = (std::max)(1, static_cast<int>(mWidth / 6));
const int boxHeight = (std::max)(1, static_cast<int>(mHeight / 5));
const float phase = 0.5f + 0.5f * std::sin(t * 1.7f);
const int x = static_cast<int>(phase * static_cast<float>(mWidth - static_cast<unsigned>(boxWidth)));
const int y = static_cast<int>((0.5f + 0.5f * std::sin(t * 1.1f + 0.8f)) * static_cast<float>(mHeight - static_cast<unsigned>(boxHeight)));
glEnable(GL_SCISSOR_TEST);
glScissor(x, y, boxWidth, boxHeight);
glClearColor(1.0f - red, 0.85f, 0.15f + blue, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
const int stripeWidth = (std::max)(1, static_cast<int>(mWidth / 80));
const int stripeX = static_cast<int>((frameIndex % 120) * (mWidth - static_cast<unsigned>(stripeWidth)) / 119);
glScissor(stripeX, 0, stripeWidth, static_cast<GLsizei>(mHeight));
glClearColor(1.0f, 1.0f, 1.0f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
glDisable(GL_SCISSOR_TEST);
}
void SimpleMotionRenderer::ShutdownGl()
{
mWidth = 0;
mHeight = 0;
}

View File

@@ -0,0 +1,20 @@
#pragma once
#include <cstdint>
class SimpleMotionRenderer
{
public:
SimpleMotionRenderer() = default;
bool InitializeGl(unsigned width, unsigned height);
void RenderFrame(uint64_t frameIndex);
void ShutdownGl();
unsigned Width() const { return mWidth; }
unsigned Height() const { return mHeight; }
private:
unsigned mWidth = 0;
unsigned mHeight = 0;
};

View File

@@ -0,0 +1,89 @@
#pragma once
#include "../video/DeckLinkOutput.h"
#include "../video/DeckLinkOutputThread.h"
#include <chrono>
#include <cstddef>
#include <cstdint>
namespace RenderCadenceCompositor
{
struct CadenceTelemetrySnapshot
{
double sampleSeconds = 0.0;
double renderFps = 0.0;
double scheduleFps = 0.0;
std::size_t freeFrames = 0;
std::size_t completedFrames = 0;
std::size_t scheduledFrames = 0;
uint64_t renderedTotal = 0;
uint64_t scheduledTotal = 0;
uint64_t completedPollMisses = 0;
uint64_t scheduleFailures = 0;
uint64_t completions = 0;
uint64_t displayedLate = 0;
uint64_t dropped = 0;
bool deckLinkBufferedAvailable = false;
uint64_t deckLinkBuffered = 0;
double deckLinkScheduleCallMilliseconds = 0.0;
};
class CadenceTelemetry
{
public:
template <typename SystemFrameExchange, typename OutputThread>
CadenceTelemetrySnapshot Sample(
const SystemFrameExchange& exchange,
const DeckLinkOutput& output,
const OutputThread& outputThread)
{
const auto now = Clock::now();
const double seconds = mHasLastSample
? std::chrono::duration_cast<std::chrono::duration<double>>(now - mLastSampleTime).count()
: 0.0;
const auto exchangeMetrics = exchange.Metrics();
const DeckLinkOutputMetrics outputMetrics = output.Metrics();
const auto threadMetrics = outputThread.Metrics();
CadenceTelemetrySnapshot snapshot;
snapshot.sampleSeconds = seconds;
snapshot.renderedTotal = exchangeMetrics.completedFrames;
snapshot.scheduledTotal = exchangeMetrics.scheduledFrames;
snapshot.freeFrames = exchangeMetrics.freeCount;
snapshot.completedFrames = exchangeMetrics.completedCount;
snapshot.scheduledFrames = exchangeMetrics.scheduledCount;
snapshot.completedPollMisses = threadMetrics.completedPollMisses;
snapshot.scheduleFailures = outputMetrics.scheduleFailures > threadMetrics.scheduleFailures
? outputMetrics.scheduleFailures
: threadMetrics.scheduleFailures;
snapshot.completions = outputMetrics.completions;
snapshot.displayedLate = outputMetrics.displayedLate;
snapshot.dropped = outputMetrics.dropped;
snapshot.deckLinkBufferedAvailable = outputMetrics.actualBufferedFramesAvailable;
snapshot.deckLinkBuffered = outputMetrics.actualBufferedFrames;
snapshot.deckLinkScheduleCallMilliseconds = outputMetrics.scheduleCallMilliseconds;
if (mHasLastSample && seconds > 0.0)
{
snapshot.renderFps = static_cast<double>(snapshot.renderedTotal - mLastRenderedFrames) / seconds;
snapshot.scheduleFps = static_cast<double>(snapshot.scheduledTotal - mLastScheduledFrames) / seconds;
}
mLastSampleTime = now;
mLastRenderedFrames = snapshot.renderedTotal;
mLastScheduledFrames = snapshot.scheduledTotal;
mHasLastSample = true;
return snapshot;
}
private:
using Clock = std::chrono::steady_clock;
Clock::time_point mLastSampleTime = Clock::now();
uint64_t mLastRenderedFrames = 0;
uint64_t mLastScheduledFrames = 0;
bool mHasLastSample = false;
};
}

View File

@@ -0,0 +1,86 @@
#pragma once
#include "CadenceTelemetry.h"
#include <atomic>
#include <chrono>
#include <iomanip>
#include <iostream>
#include <thread>
namespace RenderCadenceCompositor
{
struct TelemetryPrinterConfig
{
std::chrono::milliseconds interval = std::chrono::seconds(1);
};
class TelemetryPrinter
{
public:
explicit TelemetryPrinter(TelemetryPrinterConfig config = TelemetryPrinterConfig()) :
mConfig(config)
{
}
TelemetryPrinter(const TelemetryPrinter&) = delete;
TelemetryPrinter& operator=(const TelemetryPrinter&) = delete;
~TelemetryPrinter()
{
Stop();
}
template <typename SystemFrameExchange, typename OutputThread>
void Start(const SystemFrameExchange& exchange, const DeckLinkOutput& output, const OutputThread& outputThread)
{
if (mRunning)
return;
mStopping = false;
mThread = std::thread([this, &exchange, &output, &outputThread]() {
CadenceTelemetry telemetry;
while (!mStopping)
{
std::this_thread::sleep_for(mConfig.interval);
Print(telemetry.Sample(exchange, output, outputThread));
}
});
mRunning = true;
}
void Stop()
{
mStopping = true;
if (mThread.joinable())
mThread.join();
mRunning = false;
}
private:
static void Print(const CadenceTelemetrySnapshot& snapshot)
{
std::cout << std::fixed << std::setprecision(1)
<< "renderFps=" << snapshot.renderFps
<< " scheduleFps=" << snapshot.scheduleFps
<< " free=" << snapshot.freeFrames
<< " completed=" << snapshot.completedFrames
<< " scheduled=" << snapshot.scheduledFrames
<< " completedPollMisses=" << snapshot.completedPollMisses
<< " scheduleFailures=" << snapshot.scheduleFailures
<< " completions=" << snapshot.completions
<< " late=" << snapshot.displayedLate
<< " dropped=" << snapshot.dropped
<< " decklinkBuffered=";
if (snapshot.deckLinkBufferedAvailable)
std::cout << snapshot.deckLinkBuffered;
else
std::cout << "n/a";
std::cout << " scheduleCallMs=" << snapshot.deckLinkScheduleCallMilliseconds << "\n";
}
TelemetryPrinterConfig mConfig;
std::thread mThread;
std::atomic<bool> mStopping{ false };
std::atomic<bool> mRunning{ false };
};
}

View File

@@ -0,0 +1,105 @@
#include "DeckLinkOutput.h"
#include "VideoIOFormat.h"
namespace RenderCadenceCompositor
{
DeckLinkOutput::~DeckLinkOutput()
{
ReleaseResources();
}
bool DeckLinkOutput::Initialize(const DeckLinkOutputConfig& config, CompletionCallback completionCallback, std::string& error)
{
mConfig = config;
mCompletionCallback = completionCallback;
VideoFormatSelection formats;
if (!mSession.DiscoverDevicesAndModes(formats, error))
return false;
if (!mSession.SelectPreferredFormats(formats, config.outputAlphaRequired, error))
return false;
if (!mSession.ConfigureOutput(
[this](const VideoIOCompletion& completion) { HandleCompletion(completion); },
formats.output,
config.externalKeyingEnabled,
error))
{
return false;
}
if (!mSession.PrepareOutputSchedule())
{
error = "DeckLink output schedule preparation failed.";
return false;
}
return true;
}
bool DeckLinkOutput::StartScheduledPlayback(std::string& error)
{
if (mSession.StartScheduledPlayback())
return true;
error = "DeckLink scheduled playback failed to start.";
return false;
}
bool DeckLinkOutput::ScheduleFrame(const VideoIOOutputFrame& frame)
{
return mSession.ScheduleOutputFrame(frame);
}
void DeckLinkOutput::Stop()
{
mSession.Stop();
}
void DeckLinkOutput::ReleaseResources()
{
mSession.ReleaseResources();
}
const VideoIOState& DeckLinkOutput::State() const
{
return mSession.State();
}
DeckLinkOutputMetrics DeckLinkOutput::Metrics() const
{
DeckLinkOutputMetrics metrics;
metrics.completions = mCompletions.load();
metrics.displayedLate = mDisplayedLate.load();
metrics.dropped = mDropped.load();
metrics.flushed = mFlushed.load();
const VideoIOState& state = mSession.State();
metrics.scheduleFailures = state.deckLinkScheduleFailureCount;
metrics.actualBufferedFramesAvailable = state.actualDeckLinkBufferedFramesAvailable;
metrics.actualBufferedFrames = state.actualDeckLinkBufferedFrames;
metrics.scheduleCallMilliseconds = state.deckLinkScheduleCallMilliseconds;
return metrics;
}
void DeckLinkOutput::HandleCompletion(const VideoIOCompletion& completion)
{
++mCompletions;
switch (completion.result)
{
case VideoIOCompletionResult::DisplayedLate:
++mDisplayedLate;
break;
case VideoIOCompletionResult::Dropped:
++mDropped;
break;
case VideoIOCompletionResult::Flushed:
++mFlushed;
break;
case VideoIOCompletionResult::Completed:
case VideoIOCompletionResult::Unknown:
default:
break;
}
if (mCompletionCallback)
mCompletionCallback(completion);
}
}

View File

@@ -0,0 +1,61 @@
#pragma once
#include "DeckLinkSession.h"
#include "VideoIOTypes.h"
#include <atomic>
#include <cstdint>
#include <functional>
#include <string>
namespace RenderCadenceCompositor
{
struct DeckLinkOutputConfig
{
bool externalKeyingEnabled = false;
bool outputAlphaRequired = false;
};
struct DeckLinkOutputMetrics
{
uint64_t completions = 0;
uint64_t displayedLate = 0;
uint64_t dropped = 0;
uint64_t flushed = 0;
uint64_t scheduleFailures = 0;
bool actualBufferedFramesAvailable = false;
uint64_t actualBufferedFrames = 0;
double scheduleCallMilliseconds = 0.0;
};
class DeckLinkOutput
{
public:
using CompletionCallback = std::function<void(const VideoIOCompletion&)>;
DeckLinkOutput() = default;
DeckLinkOutput(const DeckLinkOutput&) = delete;
DeckLinkOutput& operator=(const DeckLinkOutput&) = delete;
~DeckLinkOutput();
bool Initialize(const DeckLinkOutputConfig& config, CompletionCallback completionCallback, std::string& error);
bool StartScheduledPlayback(std::string& error);
bool ScheduleFrame(const VideoIOOutputFrame& frame);
void Stop();
void ReleaseResources();
const VideoIOState& State() const;
DeckLinkOutputMetrics Metrics() const;
private:
void HandleCompletion(const VideoIOCompletion& completion);
DeckLinkSession mSession;
DeckLinkOutputConfig mConfig;
CompletionCallback mCompletionCallback;
std::atomic<uint64_t> mCompletions{ 0 };
std::atomic<uint64_t> mDisplayedLate{ 0 };
std::atomic<uint64_t> mDropped{ 0 };
std::atomic<uint64_t> mFlushed{ 0 };
};
}

View File

@@ -0,0 +1,124 @@
#pragma once
#include "../frames/SystemFrameTypes.h"
#include "DeckLinkOutput.h"
#include "VideoIOTypes.h"
#include <atomic>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <thread>
namespace RenderCadenceCompositor
{
struct DeckLinkOutputThreadConfig
{
std::size_t targetBufferedFrames = 4;
std::chrono::milliseconds idleSleep = std::chrono::milliseconds(1);
};
struct DeckLinkOutputThreadMetrics
{
uint64_t scheduledFrames = 0;
uint64_t completedPollMisses = 0;
uint64_t scheduleFailures = 0;
};
template <typename SystemFrameExchange>
class DeckLinkOutputThread
{
public:
DeckLinkOutputThread(DeckLinkOutput& output, SystemFrameExchange& exchange, DeckLinkOutputThreadConfig config = DeckLinkOutputThreadConfig()) :
mOutput(output),
mExchange(exchange),
mConfig(config)
{
}
DeckLinkOutputThread(const DeckLinkOutputThread&) = delete;
DeckLinkOutputThread& operator=(const DeckLinkOutputThread&) = delete;
~DeckLinkOutputThread()
{
Stop();
}
bool Start()
{
if (mRunning)
return true;
mStopping = false;
mThread = std::thread([this]() { ThreadMain(); });
mRunning = true;
return true;
}
void Stop()
{
mStopping = true;
if (mThread.joinable())
mThread.join();
mRunning = false;
}
DeckLinkOutputThreadMetrics Metrics() const
{
DeckLinkOutputThreadMetrics metrics;
metrics.scheduledFrames = mScheduledFrames.load();
metrics.completedPollMisses = mCompletedPollMisses.load();
metrics.scheduleFailures = mScheduleFailures.load();
return metrics;
}
private:
void ThreadMain()
{
while (!mStopping)
{
const auto exchangeMetrics = mExchange.Metrics();
if (exchangeMetrics.scheduledCount >= mConfig.targetBufferedFrames)
{
std::this_thread::sleep_for(mConfig.idleSleep);
continue;
}
SystemFrame frame;
if (!mExchange.ConsumeCompletedForSchedule(frame))
{
++mCompletedPollMisses;
std::this_thread::sleep_for(mConfig.idleSleep);
continue;
}
VideoIOOutputFrame outputFrame;
outputFrame.bytes = frame.bytes;
outputFrame.nativeBuffer = frame.bytes;
outputFrame.rowBytes = frame.rowBytes;
outputFrame.width = frame.width;
outputFrame.height = frame.height;
outputFrame.pixelFormat = frame.pixelFormat;
if (!mOutput.ScheduleFrame(outputFrame))
{
++mScheduleFailures;
mExchange.ReleaseScheduledByBytes(frame.bytes);
std::this_thread::sleep_for(mConfig.idleSleep);
continue;
}
++mScheduledFrames;
}
}
DeckLinkOutput& mOutput;
SystemFrameExchange& mExchange;
DeckLinkOutputThreadConfig mConfig;
std::thread mThread;
std::atomic<bool> mStopping{ false };
std::atomic<bool> mRunning{ false };
std::atomic<uint64_t> mScheduledFrames{ 0 };
std::atomic<uint64_t> mCompletedPollMisses{ 0 };
std::atomic<uint64_t> mScheduleFailures{ 0 };
};
}

View File

@@ -4,6 +4,14 @@ This document describes how the application currently works.
It replaces the phase-by-phase design trail as the best entry point for understanding the repo. The older phase documents remain useful history, but they mix implementation notes, experiments, and target designs. This document is organized by current runtime behavior and subsystem ownership instead.
The active plan for tightening render-thread ownership is:
- [Render Thread Ownership Plan](RENDER_THREAD_OWNERSHIP_PLAN.md)
The plan for building a fresh modular app around the proven probe architecture is:
- [New Render Cadence App Plan](NEW_RENDER_CADENCE_APP_PLAN.md)
## Application Shape
The app is a live OpenGL compositor with DeckLink input/output, runtime control services, persistent layer-stack state, live state overlays, health telemetry, and a small internal event model.

View File

@@ -0,0 +1,557 @@
# New Render Cadence App Plan
This plan describes a new application folder that rebuilds the output path from the proven `DeckLinkRenderCadenceProbe` architecture, but as a maintainable app foundation rather than a monolithic probe file.
The first goal is not to port the current compositor feature set. The first goal is to reproduce the probe's smooth 59.94/60 fps DeckLink output with clean module boundaries, tests where possible, and a structure that can later accept the shader/runtime/control systems without compromising timing.
## Working Name
Suggested folder:
```text
apps/RenderCadenceCompositor
```
Suggested executable:
```text
RenderCadenceCompositor
```
The existing app remains intact:
```text
apps/LoopThroughWithOpenGLCompositing
```
The probe remains the control sample:
```text
apps/DeckLinkRenderCadenceProbe
```
## Design Principle
The app is built around one spine:
```text
Render cadence thread
-> owns GL context
-> renders at selected frame cadence
-> performs async BGRA8 readback
-> publishes completed system-memory frames
System frame exchange
-> owns Free / Rendering / Completed / Scheduled slots
-> latest-N semantics for completed unscheduled frames
-> protects scheduled frames until DeckLink completion
DeckLink output thread
-> consumes completed frames
-> schedules to target buffer depth
-> releases scheduled frames on completion
-> never renders
```
Everything else must fit around that spine.
## Non-Negotiable Rules
- The render thread owns its GL context from initialization to shutdown.
- The render thread is driven by selected render cadence, not DeckLink demand.
- DeckLink scheduling never calls render code.
- Completion callbacks never render.
- No synchronous render request exists in the output path.
- Preview, screenshot, input upload, shader rebuild, and runtime control cannot run ahead of a due output frame.
- Completed unscheduled frames are latest-N and disposable.
- Scheduled frames are protected until DeckLink completion.
- Startup warms up real rendered frames before scheduled playback starts.
## Borrow From The Probe
Keep these behaviors from `DeckLinkRenderCadenceProbe`:
- hidden OpenGL context owned by the render thread
- simple render loop with `nextRenderTime`
- BGRA8 render target
- PBO ring readback
- non-blocking fence polling with zero timeout
- system-memory slots with `Free`, `Rendering`, `Completed`, `Scheduled`
- drop oldest completed unscheduled frame if render needs space
- DeckLink playout thread only schedules completed frames
- warmup completed frames before `StartScheduledPlayback()`
- one-line-per-second timing telemetry
## Do Not Borrow Directly
The probe is deliberately compact. Do not carry over these probe limitations into the new app:
- one huge `.cpp` file
- hard-coded output mode as permanent behavior
- render pattern, frame store, PBO logic, DeckLink playout, COM setup, and telemetry mixed together
- no reusable interfaces
- no unit-testable non-GL core
## Proposed Folder Structure
```text
apps/RenderCadenceCompositor/
README.md
RenderCadenceCompositor.cpp
app/
RenderCadenceApp.cpp
RenderCadenceApp.h
AppConfig.cpp
AppConfig.h
platform/
ComInit.cpp
ComInit.h
HiddenGlWindow.cpp
HiddenGlWindow.h
Win32Console.cpp
Win32Console.h
render/
RenderThread.cpp
RenderThread.h
RenderCadenceClock.cpp
RenderCadenceClock.h
SimpleMotionRenderer.cpp
SimpleMotionRenderer.h
Bgra8ReadbackPipeline.cpp
Bgra8ReadbackPipeline.h
PboReadbackRing.cpp
PboReadbackRing.h
frames/
SystemFrameExchange.cpp
SystemFrameExchange.h
SystemFrameTypes.h
video/
DeckLinkOutput.cpp
DeckLinkOutput.h
DeckLinkOutputThread.cpp
DeckLinkOutputThread.h
telemetry/
CadenceTelemetry.cpp
CadenceTelemetry.h
TelemetryPrinter.cpp
TelemetryPrinter.h
```
The new app can reuse selected existing source files from the current app at first:
- `videoio/decklink/DeckLinkSession.*`
- `videoio/decklink/DeckLinkDisplayMode.*`
- `videoio/decklink/DeckLinkVideoIOFormat.*`
- `videoio/decklink/DeckLinkFrameTransfer.*`
- `videoio/VideoIOFormat.*`
- `videoio/VideoIOTypes.h`
- `videoio/VideoPlayoutScheduler.*`
- `gl/renderer/GLExtensions.*`
Longer term, shared code should move into common libraries, but the first version can link these files directly to avoid a big build-system refactor.
## Module Responsibilities
### `RenderCadenceApp`
Owns top-level startup/shutdown sequencing.
Responsibilities:
- initialize COM
- discover/select DeckLink output
- create frame exchange
- start render thread
- wait for completed-frame warmup
- start DeckLink output thread
- wait for scheduled buffer warmup
- start DeckLink scheduled playback
- start telemetry printer
- stop in reverse order
It should not contain OpenGL drawing code, frame slot policy, or DeckLink scheduling loops.
### `AppConfig`
Owns runtime settings for the initial app.
Initial settings:
- output mode preference
- output width/height validation
- frame buffer capacity
- PBO depth
- warmup completed-frame count
- target DeckLink scheduled depth
- telemetry interval
Initial values should match the successful probe:
```text
systemFrameSlots = 12
pboDepth = 6
warmupFrames = 4
targetDeckLinkBufferedFrames = 4
pixelFormat = BGRA8
```
### `HiddenGlWindow`
Owns hidden Win32 window, device context, and OpenGL context creation.
Responsibilities:
- create hidden window with `CS_OWNDC`
- choose/set pixel format
- create `HGLRC`
- expose `MakeCurrent()` and `ClearCurrent()`
- destroy context/window safely
Only `RenderThread` should call `MakeCurrent()` after startup.
### `RenderThread`
Owns the render loop and GL context for its full lifetime.
Responsibilities:
- create/bind hidden GL context
- resolve GL extensions
- initialize renderer/readback pipeline
- run cadence loop
- render one frame when due
- queue PBO readback
- consume completed PBOs into `SystemFrameExchange`
- record telemetry
- destroy GL resources on the render thread
It must not:
- wait for DeckLink
- schedule DeckLink frames
- block on a system frame slot if only completed unscheduled frames can be dropped
- accept arbitrary GL tasks ahead of output frames
### `RenderCadenceClock`
Small, testable cadence helper.
Responsibilities:
- track target frame duration
- return whether a render is due
- compute sleep duration
- detect overrun/skipped ticks
- never speed up to fill buffers
This should be unit tested without GL.
### `SimpleMotionRenderer`
First renderer only.
Responsibilities:
- render obvious smooth motion and color changes
- produce BGRA8-compatible framebuffer content
- make dropped/repeated frames visually obvious
This intentionally avoids shader-package/runtime complexity.
### `Bgra8ReadbackPipeline`
Owns output framebuffer and BGRA8 readback orchestration.
Responsibilities:
- configure render target dimensions
- render into an RGBA8/BGRA-compatible texture
- coordinate `PboReadbackRing`
- publish completed frames into `SystemFrameExchange`
### `PboReadbackRing`
Owns PBO/fence state.
Responsibilities:
- queue readback into the next free PBO slot
- poll completed fences with zero timeout
- map/copy completed PBOs into provided system-memory slots
- count PBO misses
- clean up fences/PBOs on render thread
This is GL-backed, but the state model should be small and easy to reason about.
### `SystemFrameExchange`
The central handoff between render and video.
Responsibilities:
- own system-memory frame buffers
- track slot states: `Free`, `Rendering`, `Completed`, `Scheduled`
- provide `AcquireForRender()`
- provide `PublishCompleted()`
- provide `ConsumeCompletedForSchedule()`
- provide `ReleaseScheduledByBytes()`
- drop oldest completed unscheduled frame when render needs a slot
- expose metrics
This should be unit tested heavily.
### `DeckLinkOutput`
Thin wrapper around `DeckLinkSession` for output-only use.
Responsibilities:
- discover/select output mode
- configure output callback
- prepare output schedule
- schedule app-owned system-memory frames
- start scheduled playback
- stop/release resources
- expose actual DeckLink buffered count
No input support in the first version.
### `DeckLinkOutputThread`
Owns playout scheduling loop.
Responsibilities:
- keep scheduled depth near target
- consume completed frames from `SystemFrameExchange`
- schedule them through `DeckLinkOutput`
- release frame if scheduling fails
- sleep briefly when scheduled buffer is full or no completed frame exists
It must not render.
### `CadenceTelemetry`
Owns counters, not policy.
Initial counters:
- rendered frames
- completed readback frames
- scheduled frames
- completion count
- completed-frame drops
- acquire misses
- schedule underruns
- PBO queue misses
- DeckLink late count
- DeckLink dropped count
- free/rendering/completed/scheduled slot counts
- actual DeckLink buffered frames
### `TelemetryPrinter`
Prints one stable line per interval, matching the probe where possible.
Example:
```text
renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=119 late=0 dropped=0 decklinkBuffered=4
```
## Startup Sequence
Target first-version startup:
```text
main
-> parse AppConfig
-> initialize COM
-> DeckLinkOutput discover/select/configure output
-> DeckLinkOutput prepare output schedule
-> create SystemFrameExchange
-> start RenderThread
-> wait for completed frame warmup
-> start DeckLinkOutputThread
-> wait for scheduled depth warmup
-> DeckLinkOutput start scheduled playback
-> start TelemetryPrinter
-> wait for Enter
```
Shutdown:
```text
stop TelemetryPrinter
stop DeckLinkOutputThread
DeckLinkOutput stop playback
stop RenderThread
DeckLinkOutput release resources
release COM
```
## First Milestone: Modular Probe Equivalent
This is the only goal for the initial implementation.
Feature set:
- console app
- output-only DeckLink
- no input
- hidden GL context
- simple motion renderer
- BGRA8 only
- PBO async readback
- latest-N system-memory frame exchange
- warmup before playback
- one-line telemetry
Acceptance:
- visible DeckLink output is smooth
- `renderFps` near selected cadence
- `scheduleFps` near selected cadence
- scheduled count/decklink buffered count stable around 4
- no continuous late/drop count
- no continuous PBO misses
- behavior matches or exceeds `DeckLinkRenderCadenceProbe`
## Second Milestone: Testable Core
Before porting compositor features, add tests for non-GL/non-DeckLink pieces.
Test targets:
- `SystemFrameExchangeTests`
- `RenderCadenceClockTests`
- `CadenceTelemetryTests`
Important cases:
- slot lifecycle transitions
- scheduled slots are protected
- completed unscheduled frames can be dropped
- stale handles/generations are rejected
- cadence does not speed up to refill buffers
- cadence records overrun/skipped ticks
## Third Milestone: Replace Simple Renderer With Render Interface
Add an interface around frame rendering:
```text
IRenderScene
-> InitializeGl()
-> RenderFrame(frameIndex, time)
-> ShutdownGl()
```
The first implementation remains `SimpleMotionRenderer`.
This creates the insertion point for shader-package rendering later without changing timing/scheduling.
## Fourth Milestone: Begin Porting Current App Features
Port only after the modular probe equivalent is stable.
Suggested order:
1. shader package compile/load
2. render pass/layer stack drawing
3. runtime snapshot input to renderer
4. live state overlays
5. control services
6. persistence/runtime store
7. preview from system-memory frames
8. screenshot from system-memory frames
9. input capture via CPU latest-frame mailbox
Each port must preserve the rule that the render thread cadence is primary.
## What Not To Port Early
Do not port these until the output spine is proven:
- DeckLink input
- preview GL presentation
- screenshot GL readback
- HTTP/OSC control services
- shader hot reload
- persistence
- runtime state JSON/open API
- complex telemetry/event dispatch
These are useful, but they are exactly the kinds of features that can accidentally reintroduce timing coupling.
## Build Plan
Initial CMake can follow the probe pattern:
```cmake
set(RENDER_CADENCE_APP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/apps/RenderCadenceCompositor")
add_executable(RenderCadenceCompositor
# selected shared DeckLink/video/gl support files
# new modular app files
)
```
Later, shared source should be split into libraries:
```text
video_shader_decklink
video_shader_videoio
video_shader_gl_support
render_cadence_core
```
Avoid doing that library split before the first modular app works.
## VS Code Launch
Add a separate launch profile:
```text
Debug RenderCadenceCompositor
```
Run it as a console app so telemetry remains visible.
## Documentation
Add:
```text
apps/RenderCadenceCompositor/README.md
```
The README should record:
- intended architecture
- build/run instructions
- expected telemetry
- test result notes
- differences from the old app
- differences from the probe
## Success Criteria Before Porting More Features
Do not start feature porting until the new app can run with:
- stable smooth DeckLink output
- stable target scheduled depth
- stable actual DeckLink buffered count
- no regular visible freezes
- no steady PBO misses
- no steadily increasing late/dropped completions
- focus/minimize changes do not affect output cadence
- clean shutdown without hangs
This gives us a clean foundation. Once this is true, every feature added later has to prove it does not damage the spine.

View File

@@ -0,0 +1,448 @@
# Render Thread Ownership Plan
This plan describes how to make the main compositor behave like the successful `DeckLinkRenderCadenceProbe`: one render cadence owner, one GL context owner, no unrelated work able to interrupt output frame production.
The goal is not just "all GL calls happen on one thread". The current app mostly does that during runtime already. The real goal is:
- the output render thread owns its GL context for its whole lifetime
- output cadence is driven by the render thread, not by DeckLink completion timing
- non-output GL work cannot sit ahead of output frames
- callers cannot block the render thread while waiting for synchronous answers
- DeckLink scheduling consumes completed system-memory frames and never causes rendering
## Current Risk Points
The current main app still has several ways to interrupt output cadence.
### Shared GL Executor
`RenderEngine` owns the GL context during runtime, but it acts as a general task executor.
The same queue/path can run:
- output frame render
- input upload
- preview present
- screenshot capture
- render resets
- shader/program commits
- resource resize
- state clearing
That means output frames are not guaranteed to be the next GL work item at the selected frame time.
### Synchronous Output Render Request
`VideoBackend` drives output production from its output producer thread, then calls:
```text
VideoBackend
-> OpenGLVideoIOBridge::RenderScheduledFrame
-> RenderEngine::RequestOutputFrame
-> TryInvokeOnRenderThread
```
That makes output production a request/response interaction. The producer waits for the render thread, and the render thread is still shared with other work.
### Input Upload Shares Output Context
DeckLink input capture currently flows into:
```text
VideoBackend::HandleInputFrame
-> OpenGLVideoIOBridge::UploadInputFrame
-> RenderEngine::QueueInputFrame
-> render thread upload
```
Even with coalescing, input upload can consume render-thread time and GPU bandwidth directly before output rendering.
### Preview And Screenshot Share Output Context
Preview and screenshot are lower-priority features, but today they still execute on the render thread.
Preview is best-effort at the caller side, but once queued it can still occupy the same context. Screenshot capture can be more expensive because it performs readback and CPU-side image preparation.
### Startup Context Ownership Is Transitional
The Win32 startup path creates and binds the GL context before `RenderEngine::StartRenderThread()`.
That is acceptable as a transitional state, but the final model should make context ownership explicit:
- bootstrap thread creates the window/context
- bootstrap thread releases it
- render thread binds it
- only render thread initializes GL resources
- only render thread destroys GL resources
### Render Callback Re-enters App State
`OpenGLRenderPipeline::RenderFrame()` calls a callback into `OpenGLComposite::renderEffect()`.
That callback builds `RenderFrameInput`, resolves frame state, drains runtime live state, and then calls back into `RenderEngine` to draw the prepared frame.
This works, but it means the output render path still reaches up into app/runtime code at frame time.
## Target Runtime Shape
The main app should match this ownership model:
```text
runtime/control threads
-> publish snapshots, live overlays, reset requests, shader-build results
-> never call GL
render cadence thread
-> sole owner of output GL context
-> wakes at selected render cadence
-> samples latest render input/state
-> renders one frame
-> queues async readback/copies completed readback into system-memory slot
-> publishes completed frame to latest-N output buffer
video output thread
-> consumes completed system-memory frames
-> schedules DeckLink frames to target buffer depth
-> processes completion results
-> never calls GL
optional input upload path
-> writes latest input frame into CPU-side latest-frame buffer
-> render thread imports/uploads at a controlled point in its frame
preview/screenshot path
-> consumes already-rendered output/system-memory frame when possible
-> never interrupts output render cadence
```
## Non-Negotiable Rules
- The render thread never waits for DeckLink.
- DeckLink callbacks never render.
- Runtime/control threads never directly execute GL.
- Preview and screenshot never execute ahead of output frames.
- Input upload is never a separate urgent GL task ahead of output render.
- Shader/resource commits are applied only at a frame boundary.
- Telemetry on the hot path must be lock-light or try-lock only.
- The render thread cadence does not speed up to refill buffers.
- If output work overruns, the render thread records the overrun and resumes the selected cadence policy.
## Implementation Plan
### 1. Add Thread/Context Ownership Guards
Add explicit render-thread ownership checks around all GL entry points.
Deliverables:
- `RenderEngine` exposes `IsOnRenderThread()` for assertions/tests.
- GL-facing classes get debug-only owner checks where practical.
- wrong-thread GL access becomes a counted telemetry warning, not just `OutputDebugStringA`.
- tests cover that public request methods do not execute GL directly.
Acceptance:
- every `RenderEngine` public method is classified as either request-only, lifecycle-only, or render-thread-only.
- render-thread-only methods are private or guarded.
- no normal runtime caller can accidentally invoke GL work inline.
### 2. Move GL Initialization Fully Onto The Render Thread
Start the render thread before compiling shaders and initializing GL resources.
Current startup does:
```text
InitOpenGLState()
-> CompileDecodeShader
-> CompileOutputPackShader
-> InitializeResources
-> CompileLayerPrograms
StartRenderThread()
```
Move toward:
```text
create context on Win32 thread
release context on Win32 thread
StartRenderThread()
render thread binds context
render thread initializes extensions, shaders, resources
```
Deliverables:
- a single `RenderEngine::StartAndInitialize(RenderInitializationConfig)` path.
- GL extension resolution happens on the render thread.
- shader/resource initialization is a render-thread startup phase.
- `RenderEngine` destructor only destroys resources on the render thread.
Acceptance:
- after `StartRenderThread()`, no non-render thread binds or uses the app GL context.
- shutdown order is deterministic: stop video output, stop render cadence, destroy GL resources, release context.
### 3. Replace Synchronous Output Render Requests With Render-Owned Cadence
Move output cadence out of `VideoBackend` and into the render system.
Current:
```text
VideoBackend output producer
-> cadence tick
-> acquire output slot
-> synchronous render-thread request
```
Target:
```text
RenderEngine output cadence loop
-> cadence tick
-> acquire/free output slot through a non-blocking frame-sink interface
-> render frame
-> publish completed frame
```
Deliverables:
- introduce `RenderedFrameSink` or similar interface owned by video output.
- render thread pulls/claims a free system-memory slot without waiting.
- if no free slot exists, render thread drops/recycles the oldest unscheduled completed frame or records backpressure without blocking.
- remove `RenderEngine::RequestOutputFrame()` from the steady-state output path.
Acceptance:
- output rendering continues even if DeckLink completion is delayed.
- no `std::future` wait exists in the output cadence path.
- `VideoBackend` no longer owns the producer render loop; it owns scheduling/completion only.
### 4. Make The Render Thread A Frame Loop, Not A Task Queue
Keep a command mailbox, but process it only at safe frame-boundary points.
Frame loop:
```text
while running:
wait until next render timestamp
apply bounded frame-boundary commands
sample latest frame input/state
upload latest input frame if enabled and budget allows
render output frame
queue/consume readback
publish completed frame
record timings
```
Command classes:
- frame-boundary commands: reset temporal history, reset shader feedback, commit prepared shader programs
- background/low-priority commands: preview, screenshot, diagnostic readback
- non-GL commands: state publication, telemetry, persistence
Deliverables:
- replace FIFO render task queue with a priority/mailbox model.
- output cadence is the loop's main clock.
- commands have budget classes and max work per frame.
- long commands are deferred rather than blocking the current output tick.
Acceptance:
- preview/screenshot cannot run immediately before a due output frame.
- reset/shader work is applied between frames and measured.
- output render starts within a small jitter window when the GPU is not overrun.
### 5. Move Input Capture To A CPU Latest-Frame Buffer
Input capture should not enqueue independent GL upload tasks.
Target:
```text
DeckLink input callback
-> copy/coalesce latest CPU input frame
-> return quickly
render thread frame boundary
-> if input version changed, upload latest frame
-> render using last successfully uploaded input texture
```
Deliverables:
- introduce `InputFrameMailbox` with latest-frame semantics.
- remove `RenderEngine::QueueInputFrame()` from the callback path.
- render thread owns the upload moment.
- if upload would exceed budget, render thread can reuse the previous input texture and record an input-upload skip.
Acceptance:
- input capture enabled does not create arbitrary render-thread tasks.
- output cadence remains stable when input frames arrive.
- telemetry separates input-frame arrival, upload count, upload skips, and upload cost.
### 6. Move Preview To A Consumer Path
Preview should consume the latest completed output image instead of asking the output GL context to present.
Options:
- CPU preview from latest system-memory output frame.
- a separate preview GL context fed asynchronously from completed frames.
- a low-priority render-thread blit only when output has measurable slack.
Recommended first step:
- use latest system-memory BGRA8 output for the window preview.
Deliverables:
- preview reads from latest completed/scheduled output frame copy.
- `TryPresentPreview()` no longer queues GL work on the output render thread.
- preview FPS throttling remains caller-side.
Acceptance:
- forcing preview cannot delay output rendering.
- minimizing/focusing the window does not affect output cadence.
### 7. Move Screenshot To Completed Frame Capture
Screenshot should capture from the latest completed output frame unless an explicit "exact render capture" mode is requested.
Deliverables:
- screenshot request reads the latest system-memory output frame.
- PNG write remains async.
- optional diagnostic exact-GL screenshot is disabled during live output or explicitly marked disruptive.
Acceptance:
- screenshot request does not call `glReadPixels` on the output render context during steady-state playout.
### 8. Make Shader Commits Frame-Boundary Work
Prepared shader builds are CPU/background work; GL program commit is still GL work.
Deliverables:
- shader build queue produces `PreparedShaderBuild`.
- render thread sees latest pending prepared build at a frame boundary.
- commit is applied only between frames.
- expensive commits can temporarily enter a measured "render reconfigure" state.
Acceptance:
- shader commits do not interleave midway through output render.
- output timing telemetry records commit duration separately from normal render duration.
### 9. Split Output Scheduling From Rendering Completely
`VideoBackend` should become a playout/scheduling owner, not a render producer.
Target:
```text
RenderEngine
-> produces completed frames at render cadence
VideoBackend
-> schedules completed frames up to target DeckLink depth
-> processes completions
-> releases scheduled slots
```
Deliverables:
- `VideoBackend` owns `SystemOutputFramePool`, or a new `SystemFrameExchange` owns it between render/video.
- render thread publishes completed frames into the exchange.
- video output thread schedules from the exchange.
- no render calls exist in completion handling or scheduling paths.
Acceptance:
- DeckLink buffer depth changes cannot directly cause render-thread wakeups except through non-blocking availability signals.
- render cadence can be tested without DeckLink by using a fake frame sink.
- video scheduling can be tested without GL by using synthetic frames.
### 10. Preserve The Probe As The Reference Contract
The `DeckLinkRenderCadenceProbe` is now the control sample.
Deliverables:
- document which main-app components correspond to the probe components.
- add a small regression checklist:
- render FPS near target
- schedule FPS near target
- DeckLink buffered frames stable
- no late/drop frames
- no PBO misses or readback stalls
- focus/minimize does not change output cadence
Acceptance:
- after each migration step, compare the main app telemetry against the probe's known-good behavior.
## Suggested Order Of Work
1. Add ownership guards and classify render methods.
2. Move GL initialization/destruction fully onto the render thread.
3. Introduce a render-owned cadence loop behind a feature flag.
4. Add a frame-sink/exchange interface between render and video.
5. Move output production from `VideoBackend` to the render cadence loop.
6. Convert input upload to latest-frame mailbox semantics.
7. Move preview to completed-frame consumption.
8. Move screenshot to completed-frame capture.
9. Convert shader commits/resets to frame-boundary mailbox commands.
10. Remove old synchronous output render request path.
## Feature Flags During Migration
Use flags only to keep testing safe, not as long-term compatibility layers.
Suggested flags:
```text
VST_RENDER_CADENCE_OWNER=render_thread
VST_DISABLE_INPUT_CAPTURE=1
VST_PREVIEW_SOURCE=system_frame
VST_SCREENSHOT_SOURCE=system_frame
```
Remove each flag once the new behavior is proven and becomes the only supported path.
## Telemetry Needed
Add or preserve counters for:
- render tick jitter
- render tick overrun
- output render duration
- GL command mailbox depth by class
- frame-boundary command duration
- input upload duration and skips
- readback queue/consume duration
- completed system-memory frame depth
- scheduled DeckLink frame depth
- DeckLink actual buffered frames
- preview frames consumed
- screenshot requests served from system memory
The key metric is whether output render starts on time. Buffer depth alone is not enough; a full buffer can still contain stale or repeated frames.
## Completion Definition
This work is complete when:
- the output render thread owns the app GL context from initialization through shutdown
- output rendering is driven by the render thread's selected frame cadence
- no non-output task can run ahead of a due output frame
- `VideoBackend` never asks the render thread to render synchronously
- DeckLink scheduling consumes already completed system-memory frames
- input upload, preview, screenshot, shader commits, and resets are all frame-boundary, mailbox, or consumer-side operations
- main-app telemetry approaches the cadence probe behavior under the same output mode

View File

@@ -0,0 +1,176 @@
#include "SystemFrameExchange.h"
#include <chrono>
#include <cstdint>
#include <iostream>
namespace
{
int gFailures = 0;
void Expect(bool condition, const char* message)
{
if (condition)
return;
std::cerr << "FAIL: " << message << "\n";
++gFailures;
}
SystemFrameExchangeConfig MakeConfig(std::size_t capacity = 2)
{
SystemFrameExchangeConfig config;
config.width = 4;
config.height = 3;
config.pixelFormat = VideoIOPixelFormat::Bgra8;
config.capacity = capacity;
return config;
}
void TestAcquirePublishesAndSchedules()
{
SystemFrameExchange exchange(MakeConfig(1));
SystemFrame frame;
Expect(exchange.AcquireForRender(frame), "frame can be acquired for render");
Expect(frame.bytes != nullptr, "acquired frame has storage");
Expect(frame.width == 4, "frame width is configured");
Expect(frame.height == 3, "frame height is configured");
Expect(frame.rowBytes == 16, "BGRA8 row bytes are inferred");
Expect(frame.pixelFormat == VideoIOPixelFormat::Bgra8, "pixel format is configured");
frame.frameIndex = 42;
Expect(exchange.PublishCompleted(frame), "rendering frame can be completed");
Expect(exchange.WaitForCompletedDepth(1, std::chrono::milliseconds(0)), "completed depth can be observed");
SystemFrame scheduled;
Expect(exchange.ConsumeCompletedForSchedule(scheduled), "completed frame can be scheduled");
Expect(scheduled.index == frame.index, "scheduled frame uses completed slot");
Expect(scheduled.generation == frame.generation, "scheduled frame keeps generation");
Expect(scheduled.frameIndex == 42, "frame index is preserved");
Expect(exchange.ReleaseScheduledByBytes(scheduled.bytes), "scheduled frame can be released by bytes");
SystemFrameExchangeMetrics metrics = exchange.Metrics();
Expect(metrics.freeCount == 1, "released slot returns to free");
Expect(metrics.completedFrames == 1, "completed metric is counted");
Expect(metrics.scheduledFrames == 1, "scheduled metric is counted");
}
void TestAcquireDropsOldestCompletedUnscheduled()
{
SystemFrameExchange exchange(MakeConfig(2));
SystemFrame first;
SystemFrame second;
SystemFrame third;
Expect(exchange.AcquireForRender(first), "first frame can be acquired");
first.frameIndex = 1;
Expect(exchange.PublishCompleted(first), "first frame can be completed");
Expect(exchange.AcquireForRender(second), "second frame can be acquired");
second.frameIndex = 2;
Expect(exchange.PublishCompleted(second), "second frame can be completed");
Expect(exchange.AcquireForRender(third), "third acquire drops the oldest completed frame");
Expect(third.index == first.index, "oldest completed slot is reused");
SystemFrame scheduled;
Expect(exchange.ConsumeCompletedForSchedule(scheduled), "remaining completed frame can be scheduled");
Expect(scheduled.index == second.index, "newer completed frame survives drop");
Expect(scheduled.frameIndex == 2, "newer frame index survives drop");
SystemFrameExchangeMetrics metrics = exchange.Metrics();
Expect(metrics.completedDrops == 1, "drop metric is counted");
Expect(metrics.renderingCount == 1, "reused slot is rendering");
Expect(metrics.scheduledCount == 1, "consumed slot is scheduled");
}
void TestScheduledFramesAreNotDropped()
{
SystemFrameExchange exchange(MakeConfig(1));
SystemFrame frame;
Expect(exchange.AcquireForRender(frame), "single frame can be acquired");
Expect(exchange.PublishCompleted(frame), "single frame can be completed");
SystemFrame scheduled;
Expect(exchange.ConsumeCompletedForSchedule(scheduled), "single frame can be scheduled");
SystemFrame extra;
Expect(!exchange.AcquireForRender(extra), "scheduled frame is not dropped for render acquire");
SystemFrameExchangeMetrics metrics = exchange.Metrics();
Expect(metrics.acquireMisses == 1, "blocked acquire miss is counted");
Expect(metrics.completedDrops == 0, "scheduled frame is not counted as a completed drop");
}
void TestGenerationValidationRejectsStaleFrames()
{
SystemFrameExchange exchange(MakeConfig(1));
SystemFrame first;
Expect(exchange.AcquireForRender(first), "frame can be acquired");
Expect(exchange.PublishCompleted(first), "frame can be completed");
SystemFrame scheduled;
Expect(exchange.ConsumeCompletedForSchedule(scheduled), "frame can be scheduled");
Expect(exchange.ReleaseScheduledByBytes(scheduled.bytes), "frame can be released");
SystemFrame second;
Expect(exchange.AcquireForRender(second), "slot can be reacquired");
Expect(second.index == first.index, "same slot is reused");
Expect(second.generation != first.generation, "reacquire invalidates stale generation");
Expect(!exchange.PublishCompleted(first), "stale frame cannot be completed");
}
void TestPixelFormatAwareSizing()
{
SystemFrameExchangeConfig config;
config.width = 7;
config.height = 2;
config.pixelFormat = VideoIOPixelFormat::V210;
config.capacity = 1;
SystemFrameExchange exchange(config);
SystemFrame frame;
Expect(exchange.AcquireForRender(frame), "v210 frame can be acquired");
Expect(frame.pixelFormat == VideoIOPixelFormat::V210, "v210 pixel format is preserved");
Expect(frame.rowBytes == static_cast<long>(VideoIORowBytes(VideoIOPixelFormat::V210, 7)), "v210 row bytes are inferred");
config.pixelFormat = VideoIOPixelFormat::Uyvy8;
config.rowBytes = 64;
exchange.Configure(config);
Expect(exchange.AcquireForRender(frame), "explicit row-byte frame can be acquired");
Expect(frame.pixelFormat == VideoIOPixelFormat::Uyvy8, "reconfigured pixel format is preserved");
Expect(frame.rowBytes == 64, "explicit row bytes are preserved");
}
void TestCompletedPollMissIsCounted()
{
SystemFrameExchange exchange(MakeConfig(1));
SystemFrame frame;
Expect(!exchange.ConsumeCompletedForSchedule(frame), "empty completed queue cannot be consumed");
SystemFrameExchangeMetrics metrics = exchange.Metrics();
Expect(metrics.completedPollMisses == 1, "completed poll miss is counted");
}
}
int main()
{
TestAcquirePublishesAndSchedules();
TestAcquireDropsOldestCompletedUnscheduled();
TestScheduledFramesAreNotDropped();
TestGenerationValidationRejectsStaleFrames();
TestPixelFormatAwareSizing();
TestCompletedPollMissIsCounted();
if (gFailures != 0)
{
std::cerr << gFailures << " frame exchange test failure(s).\n";
return 1;
}
std::cout << "RenderCadenceCompositor frame exchange tests passed.\n";
return 0;
}