Improvement
All checks were successful
CI / React UI Build (push) Successful in 11s
CI / Native Windows Build And Tests (push) Successful in 2m52s
CI / Windows Release Package (push) Successful in 3m0s

This commit is contained in:
Aiden
2026-05-12 00:00:23 +10:00
parent a434a88108
commit 9e3412712c
22 changed files with 1409 additions and 34 deletions

View File

@@ -172,6 +172,8 @@ set(APP_SOURCES
"${APP_DIR}/videoio/OutputProductionController.h" "${APP_DIR}/videoio/OutputProductionController.h"
"${APP_DIR}/videoio/RenderOutputQueue.cpp" "${APP_DIR}/videoio/RenderOutputQueue.cpp"
"${APP_DIR}/videoio/RenderOutputQueue.h" "${APP_DIR}/videoio/RenderOutputQueue.h"
"${APP_DIR}/videoio/SystemOutputFramePool.cpp"
"${APP_DIR}/videoio/SystemOutputFramePool.h"
"${APP_DIR}/videoio/VideoPlayoutPolicy.h" "${APP_DIR}/videoio/VideoPlayoutPolicy.h"
"${APP_DIR}/videoio/VideoPlayoutScheduler.cpp" "${APP_DIR}/videoio/VideoPlayoutScheduler.cpp"
"${APP_DIR}/videoio/VideoPlayoutScheduler.h" "${APP_DIR}/videoio/VideoPlayoutScheduler.h"
@@ -578,6 +580,24 @@ endif()
add_test(NAME RenderOutputQueueTests COMMAND RenderOutputQueueTests) add_test(NAME RenderOutputQueueTests COMMAND RenderOutputQueueTests)
add_executable(SystemOutputFramePoolTests
"${APP_DIR}/videoio/SystemOutputFramePool.cpp"
"${APP_DIR}/videoio/VideoIOFormat.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/tests/SystemOutputFramePoolTests.cpp"
)
target_include_directories(SystemOutputFramePoolTests PRIVATE
"${APP_DIR}"
"${APP_DIR}/videoio"
"${APP_DIR}/videoio/decklink"
)
if(MSVC)
target_compile_options(SystemOutputFramePoolTests PRIVATE /W3)
endif()
add_test(NAME SystemOutputFramePoolTests COMMAND SystemOutputFramePoolTests)
add_executable(VideoBackendLifecycleTests add_executable(VideoBackendLifecycleTests
"${APP_DIR}/videoio/VideoBackendLifecycle.cpp" "${APP_DIR}/videoio/VideoBackendLifecycle.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/tests/VideoBackendLifecycleTests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/tests/VideoBackendLifecycleTests.cpp"

View File

@@ -25,7 +25,8 @@ OpenGLRenderPipeline::OpenGLRenderPipeline(
mRenderEffect(renderEffect), mRenderEffect(renderEffect),
mOutputReady(outputReady), mOutputReady(outputReady),
mPaint(paint), mPaint(paint),
mOutputReadbackMode(ReadOutputReadbackModeFromEnvironment()) mOutputReadbackMode(ReadOutputReadbackModeFromEnvironment()),
mAsyncReadbackDepth(ReadAsyncReadbackDepthFromEnvironment())
{ {
} }
@@ -47,7 +48,9 @@ bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputFramebuffer()); glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputFramebuffer());
if (mOutputReady) if (mOutputReady)
mOutputReady(); mOutputReady();
if (state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10) if (state.outputPixelFormat == VideoIOPixelFormat::Bgra8)
PackOutputForBgra8(state);
else if (state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10)
PackOutputFor10Bit(state); PackOutputFor10Bit(state);
glFlush(); glFlush();
@@ -76,6 +79,24 @@ bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context
return true; return true;
} }
void OpenGLRenderPipeline::PackOutputForBgra8(const VideoIOState& state)
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
glBlitFramebuffer(
0,
0,
state.outputFrameSize.width,
state.outputFrameSize.height,
0,
0,
state.outputFrameSize.width,
state.outputFrameSize.height,
GL_COLOR_BUFFER_BIT,
GL_NEAREST);
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
}
void OpenGLRenderPipeline::PackOutputFor10Bit(const VideoIOState& state) void OpenGLRenderPipeline::PackOutputFor10Bit(const VideoIOState& state)
{ {
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputPackFramebuffer()); glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
@@ -109,11 +130,17 @@ bool OpenGLRenderPipeline::EnsureAsyncReadbackBuffers(std::size_t requiredBytes)
if (requiredBytes == 0) if (requiredBytes == 0)
return false; return false;
if (mAsyncReadbackBytes == requiredBytes && mAsyncReadbackSlots[0].pixelPackBuffer != 0) if (mAsyncReadbackBytes == requiredBytes &&
mAsyncReadbackSlots.size() == mAsyncReadbackDepth &&
!mAsyncReadbackSlots.empty() &&
mAsyncReadbackSlots[0].pixelPackBuffer != 0)
{
return true; return true;
}
ResetAsyncReadbackState(); ResetAsyncReadbackState();
mAsyncReadbackBytes = requiredBytes; mAsyncReadbackBytes = requiredBytes;
mAsyncReadbackSlots.resize(mAsyncReadbackDepth);
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots) for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
{ {
glGenBuffers(1, &slot.pixelPackBuffer); glGenBuffers(1, &slot.pixelPackBuffer);
@@ -134,7 +161,7 @@ void OpenGLRenderPipeline::ResetAsyncReadbackState()
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots) for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
slot.sizeBytes = 0; slot.sizeBytes = 0;
if (mAsyncReadbackSlots[0].pixelPackBuffer != 0) if (!mAsyncReadbackSlots.empty() && mAsyncReadbackSlots[0].pixelPackBuffer != 0)
{ {
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots) for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
{ {
@@ -149,6 +176,7 @@ void OpenGLRenderPipeline::ResetAsyncReadbackState()
mAsyncReadbackWriteIndex = 0; mAsyncReadbackWriteIndex = 0;
mAsyncReadbackReadIndex = 0; mAsyncReadbackReadIndex = 0;
mAsyncReadbackBytes = 0; mAsyncReadbackBytes = 0;
mAsyncReadbackSlots.clear();
} }
void OpenGLRenderPipeline::FlushAsyncReadbackPipeline() void OpenGLRenderPipeline::FlushAsyncReadbackPipeline()
@@ -170,12 +198,14 @@ void OpenGLRenderPipeline::FlushAsyncReadbackPipeline()
bool OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing) bool OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing)
{ {
const auto queueStartTime = std::chrono::steady_clock::now(); const auto queueStartTime = std::chrono::steady_clock::now();
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10; const bool useTenBitPackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 ||
state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
const bool usePackFramebuffer = state.outputPixelFormat == VideoIOPixelFormat::Bgra8 || useTenBitPackedOutput;
const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height; const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height;
const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA; const GLenum format = useTenBitPackedOutput ? GL_RGBA : GL_BGRA;
const GLenum type = usePackedOutput ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_8_8_8_8_REV; const GLenum type = useTenBitPackedOutput ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_8_8_8_8_REV;
const GLuint framebuffer = usePackedOutput ? mRenderer.OutputPackFramebuffer() : mRenderer.OutputFramebuffer(); const GLuint framebuffer = usePackFramebuffer ? mRenderer.OutputPackFramebuffer() : mRenderer.OutputFramebuffer();
const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width); const GLsizei readWidth = static_cast<GLsizei>(useTenBitPackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width);
const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height); const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height);
const auto finishTiming = [&timing, queueStartTime]() { const auto finishTiming = [&timing, queueStartTime]() {
@@ -204,6 +234,12 @@ bool OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state, OutputR
} }
} }
if (mAsyncReadbackSlots.empty())
{
finishTiming();
return false;
}
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex]; AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex];
if (slot.inFlight) if (slot.inFlight)
{ {
@@ -321,13 +357,17 @@ void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& stat
{ {
const auto readStartTime = std::chrono::steady_clock::now(); const auto readStartTime = std::chrono::steady_clock::now();
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10; const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
const bool usePackFramebuffer = state.outputPixelFormat == VideoIOPixelFormat::Bgra8 || usePackedOutput;
glPixelStorei(GL_PACK_ALIGNMENT, 4); glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0);
if (usePackedOutput) if (usePackFramebuffer)
{ {
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputPackFramebuffer()); glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
glReadPixels(0, 0, state.outputPackTextureWidth, state.outputFrameSize.height, GL_RGBA, GL_UNSIGNED_BYTE, destinationBytes); if (usePackedOutput)
glReadPixels(0, 0, state.outputPackTextureWidth, state.outputFrameSize.height, GL_RGBA, GL_UNSIGNED_BYTE, destinationBytes);
else
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes);
} }
else else
{ {
@@ -366,15 +406,19 @@ OpenGLRenderPipeline::OutputReadbackTiming OpenGLRenderPipeline::ReadOutputFrame
return timing; return timing;
} }
if (TryConsumeAsyncReadback(outputFrame, 500000, timing)) if (TryConsumeAsyncReadback(outputFrame, 0, timing))
{ {
(void)QueueAsyncReadback(state, timing); (void)QueueAsyncReadback(state, timing);
return timing; return timing;
} }
const bool queued = QueueAsyncReadback(state, timing);
if (queued && TryConsumeAsyncReadback(outputFrame, 0, timing))
return timing;
if (TryCopyCachedOutputFrame(outputFrame, timing)) if (TryCopyCachedOutputFrame(outputFrame, timing))
{ {
(void)QueueAsyncReadback(state, timing);
return timing; return timing;
} }
@@ -386,8 +430,8 @@ OpenGLRenderPipeline::OutputReadbackTiming OpenGLRenderPipeline::ReadOutputFrame
CacheOutputFrame(outputFrame); CacheOutputFrame(outputFrame);
} }
FlushAsyncReadbackPipeline(); if (!queued)
(void)QueueAsyncReadback(state, timing); (void)QueueAsyncReadback(state, timing);
return timing; return timing;
} }
@@ -409,3 +453,27 @@ OpenGLRenderPipeline::OutputReadbackMode OpenGLRenderPipeline::ReadOutputReadbac
return OutputReadbackMode::AsyncPbo; return OutputReadbackMode::AsyncPbo;
} }
std::size_t OpenGLRenderPipeline::ReadAsyncReadbackDepthFromEnvironment()
{
char* depthValue = nullptr;
std::size_t depthValueSize = 0;
if (_dupenv_s(&depthValue, &depthValueSize, "VST_OUTPUT_READBACK_DEPTH") != 0 || depthValue == nullptr)
return 6;
const std::string value(depthValue);
std::free(depthValue);
try
{
const unsigned long requestedDepth = std::stoul(value);
if (requestedDepth < 3)
return 3;
if (requestedDepth > 12)
return 12;
return static_cast<std::size_t>(requestedDepth);
}
catch (...)
{
return 6;
}
}

View File

@@ -3,7 +3,6 @@
#include "GLExtensions.h" #include "GLExtensions.h"
#include "VideoIOTypes.h" #include "VideoIOTypes.h"
#include <array>
#include <functional> #include <functional>
#include <vector> #include <vector>
@@ -76,9 +75,11 @@ private:
void CacheOutputFrame(const VideoIOOutputFrame& outputFrame); void CacheOutputFrame(const VideoIOOutputFrame& outputFrame);
bool TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const; bool TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const;
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing); void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing);
void PackOutputForBgra8(const VideoIOState& state);
void PackOutputFor10Bit(const VideoIOState& state); void PackOutputFor10Bit(const VideoIOState& state);
OutputReadbackTiming ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame); OutputReadbackTiming ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
static OutputReadbackMode ReadOutputReadbackModeFromEnvironment(); static OutputReadbackMode ReadOutputReadbackModeFromEnvironment();
static std::size_t ReadAsyncReadbackDepthFromEnvironment();
OpenGLRenderer& mRenderer; OpenGLRenderer& mRenderer;
RuntimeSnapshotProvider& mRuntimeSnapshotProvider; RuntimeSnapshotProvider& mRuntimeSnapshotProvider;
@@ -87,7 +88,8 @@ private:
OutputReadyCallback mOutputReady; OutputReadyCallback mOutputReady;
PaintCallback mPaint; PaintCallback mPaint;
OutputReadbackMode mOutputReadbackMode = OutputReadbackMode::AsyncPbo; OutputReadbackMode mOutputReadbackMode = OutputReadbackMode::AsyncPbo;
std::array<AsyncReadbackSlot, 3> mAsyncReadbackSlots; std::vector<AsyncReadbackSlot> mAsyncReadbackSlots;
std::size_t mAsyncReadbackDepth = 0;
std::size_t mAsyncReadbackWriteIndex = 0; std::size_t mAsyncReadbackWriteIndex = 0;
std::size_t mAsyncReadbackReadIndex = 0; std::size_t mAsyncReadbackReadIndex = 0;
std::size_t mAsyncReadbackBytes = 0; std::size_t mAsyncReadbackBytes = 0;

View File

@@ -91,6 +91,16 @@ JsonValue RuntimeStatePresenter::BuildRuntimeStateValue(const RuntimeStore& runt
readyQueue.set("droppedCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.readyQueueDroppedCount))); readyQueue.set("droppedCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.readyQueueDroppedCount)));
readyQueue.set("underrunCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.readyQueueUnderrunCount))); readyQueue.set("underrunCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.readyQueueUnderrunCount)));
JsonValue systemMemory = JsonValue::MakeObject();
systemMemory.set("freeFrameCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.systemFramePoolFree)));
systemMemory.set("readyFrameCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.systemFramePoolReady)));
systemMemory.set("scheduledFrameCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.systemFramePoolScheduled)));
systemMemory.set("underrunCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.systemFrameUnderrunCount)));
systemMemory.set("repeatCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.systemFrameRepeatCount)));
systemMemory.set("dropCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.systemFrameDropCount)));
systemMemory.set("ageAtScheduleMs", JsonValue(telemetrySnapshot.backendPlayout.systemFrameAgeAtScheduleMilliseconds));
systemMemory.set("ageAtCompletionMs", JsonValue(telemetrySnapshot.backendPlayout.systemFrameAgeAtCompletionMilliseconds));
JsonValue outputRender = JsonValue::MakeObject(); JsonValue outputRender = JsonValue::MakeObject();
outputRender.set("renderMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderMilliseconds)); outputRender.set("renderMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderMilliseconds));
outputRender.set("smoothedRenderMs", JsonValue(telemetrySnapshot.backendPlayout.smoothedOutputRenderMilliseconds)); outputRender.set("smoothedRenderMs", JsonValue(telemetrySnapshot.backendPlayout.smoothedOutputRenderMilliseconds));
@@ -132,6 +142,7 @@ JsonValue RuntimeStatePresenter::BuildRuntimeStateValue(const RuntimeStore& runt
backendPlayout.set("droppedFrameCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.droppedFrameCount))); backendPlayout.set("droppedFrameCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.droppedFrameCount)));
backendPlayout.set("flushedFrameCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.flushedFrameCount))); backendPlayout.set("flushedFrameCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.flushedFrameCount)));
backendPlayout.set("readyQueue", readyQueue); backendPlayout.set("readyQueue", readyQueue);
backendPlayout.set("systemMemory", systemMemory);
backendPlayout.set("outputRender", outputRender); backendPlayout.set("outputRender", outputRender);
backendPlayout.set("recovery", recovery); backendPlayout.set("recovery", recovery);
root.set("backendPlayout", backendPlayout); root.set("backendPlayout", backendPlayout);

View File

@@ -313,6 +313,40 @@ bool HealthTelemetry::TryRecordOutputRenderQueueWait(double queueWaitMillisecond
return true; return true;
} }
void HealthTelemetry::RecordSystemMemoryPlayoutStats(std::size_t freeFrameCount, std::size_t readyFrameCount,
std::size_t scheduledFrameCount, uint64_t underrunCount, uint64_t repeatCount, uint64_t dropCount,
double frameAgeAtScheduleMilliseconds, double frameAgeAtCompletionMilliseconds)
{
std::lock_guard<std::mutex> lock(mMutex);
mBackendPlayout.systemFramePoolFree = freeFrameCount;
mBackendPlayout.systemFramePoolReady = readyFrameCount;
mBackendPlayout.systemFramePoolScheduled = scheduledFrameCount;
mBackendPlayout.systemFrameUnderrunCount = underrunCount;
mBackendPlayout.systemFrameRepeatCount = repeatCount;
mBackendPlayout.systemFrameDropCount = dropCount;
mBackendPlayout.systemFrameAgeAtScheduleMilliseconds = std::max(frameAgeAtScheduleMilliseconds, 0.0);
mBackendPlayout.systemFrameAgeAtCompletionMilliseconds = std::max(frameAgeAtCompletionMilliseconds, 0.0);
}
bool HealthTelemetry::TryRecordSystemMemoryPlayoutStats(std::size_t freeFrameCount, std::size_t readyFrameCount,
std::size_t scheduledFrameCount, uint64_t underrunCount, uint64_t repeatCount, uint64_t dropCount,
double frameAgeAtScheduleMilliseconds, double frameAgeAtCompletionMilliseconds)
{
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
if (!lock.owns_lock())
return false;
mBackendPlayout.systemFramePoolFree = freeFrameCount;
mBackendPlayout.systemFramePoolReady = readyFrameCount;
mBackendPlayout.systemFramePoolScheduled = scheduledFrameCount;
mBackendPlayout.systemFrameUnderrunCount = underrunCount;
mBackendPlayout.systemFrameRepeatCount = repeatCount;
mBackendPlayout.systemFrameDropCount = dropCount;
mBackendPlayout.systemFrameAgeAtScheduleMilliseconds = std::max(frameAgeAtScheduleMilliseconds, 0.0);
mBackendPlayout.systemFrameAgeAtCompletionMilliseconds = std::max(frameAgeAtCompletionMilliseconds, 0.0);
return true;
}
void HealthTelemetry::RecordOutputRenderPipelineTiming( void HealthTelemetry::RecordOutputRenderPipelineTiming(
double drawMilliseconds, double drawMilliseconds,
double fenceWaitMilliseconds, double fenceWaitMilliseconds,

View File

@@ -94,6 +94,14 @@ public:
uint64_t readyQueuePoppedCount = 0; uint64_t readyQueuePoppedCount = 0;
uint64_t readyQueueDroppedCount = 0; uint64_t readyQueueDroppedCount = 0;
uint64_t readyQueueUnderrunCount = 0; uint64_t readyQueueUnderrunCount = 0;
std::size_t systemFramePoolFree = 0;
std::size_t systemFramePoolReady = 0;
std::size_t systemFramePoolScheduled = 0;
uint64_t systemFrameUnderrunCount = 0;
uint64_t systemFrameRepeatCount = 0;
uint64_t systemFrameDropCount = 0;
double systemFrameAgeAtScheduleMilliseconds = 0.0;
double systemFrameAgeAtCompletionMilliseconds = 0.0;
double outputRenderMilliseconds = 0.0; double outputRenderMilliseconds = 0.0;
double smoothedOutputRenderMilliseconds = 0.0; double smoothedOutputRenderMilliseconds = 0.0;
double maxOutputRenderMilliseconds = 0.0; double maxOutputRenderMilliseconds = 0.0;
@@ -198,6 +206,13 @@ public:
void RecordOutputRenderQueueWait(double queueWaitMilliseconds); void RecordOutputRenderQueueWait(double queueWaitMilliseconds);
bool TryRecordOutputRenderQueueWait(double queueWaitMilliseconds); bool TryRecordOutputRenderQueueWait(double queueWaitMilliseconds);
void RecordSystemMemoryPlayoutStats(std::size_t freeFrameCount, std::size_t readyFrameCount,
std::size_t scheduledFrameCount, uint64_t underrunCount, uint64_t repeatCount, uint64_t dropCount,
double frameAgeAtScheduleMilliseconds, double frameAgeAtCompletionMilliseconds);
bool TryRecordSystemMemoryPlayoutStats(std::size_t freeFrameCount, std::size_t readyFrameCount,
std::size_t scheduledFrameCount, uint64_t underrunCount, uint64_t repeatCount, uint64_t dropCount,
double frameAgeAtScheduleMilliseconds, double frameAgeAtCompletionMilliseconds);
void RecordOutputRenderPipelineTiming( void RecordOutputRenderPipelineTiming(
double drawMilliseconds, double drawMilliseconds,
double fenceWaitMilliseconds, double fenceWaitMilliseconds,

View File

@@ -11,6 +11,7 @@ void RenderOutputQueue::Configure(const VideoPlayoutPolicy& policy)
mPolicy = NormalizeVideoPlayoutPolicy(policy); mPolicy = NormalizeVideoPlayoutPolicy(policy);
while (mReadyFrames.size() > CapacityLocked()) while (mReadyFrames.size() > CapacityLocked())
{ {
ReleaseFrame(mReadyFrames.front());
mReadyFrames.pop_front(); mReadyFrames.pop_front();
++mDroppedCount; ++mDroppedCount;
} }
@@ -21,6 +22,7 @@ bool RenderOutputQueue::Push(RenderOutputFrame frame)
std::lock_guard<std::mutex> lock(mMutex); std::lock_guard<std::mutex> lock(mMutex);
if (mReadyFrames.size() >= CapacityLocked()) if (mReadyFrames.size() >= CapacityLocked())
{ {
ReleaseFrame(mReadyFrames.front());
mReadyFrames.pop_front(); mReadyFrames.pop_front();
++mDroppedCount; ++mDroppedCount;
} }
@@ -48,6 +50,8 @@ bool RenderOutputQueue::TryPop(RenderOutputFrame& frame)
void RenderOutputQueue::Clear() void RenderOutputQueue::Clear()
{ {
std::lock_guard<std::mutex> lock(mMutex); std::lock_guard<std::mutex> lock(mMutex);
for (RenderOutputFrame& frame : mReadyFrames)
ReleaseFrame(frame);
mReadyFrames.clear(); mReadyFrames.clear();
} }
@@ -68,3 +72,10 @@ std::size_t RenderOutputQueue::CapacityLocked() const
{ {
return static_cast<std::size_t>(mPolicy.maxReadyFrames); return static_cast<std::size_t>(mPolicy.maxReadyFrames);
} }
void RenderOutputQueue::ReleaseFrame(RenderOutputFrame& frame)
{
if (frame.releaseFrame)
frame.releaseFrame(frame.frame);
frame.releaseFrame = {};
}

View File

@@ -5,6 +5,7 @@
#include <cstdint> #include <cstdint>
#include <deque> #include <deque>
#include <functional>
#include <mutex> #include <mutex>
struct RenderOutputFrame struct RenderOutputFrame
@@ -12,6 +13,7 @@ struct RenderOutputFrame
VideoIOOutputFrame frame; VideoIOOutputFrame frame;
uint64_t frameIndex = 0; uint64_t frameIndex = 0;
bool stale = false; bool stale = false;
std::function<void(VideoIOOutputFrame& frame)> releaseFrame;
}; };
struct RenderOutputQueueMetrics struct RenderOutputQueueMetrics
@@ -37,6 +39,7 @@ public:
private: private:
std::size_t CapacityLocked() const; std::size_t CapacityLocked() const;
static void ReleaseFrame(RenderOutputFrame& frame);
mutable std::mutex mMutex; mutable std::mutex mMutex;
VideoPlayoutPolicy mPolicy; VideoPlayoutPolicy mPolicy;

View File

@@ -0,0 +1,253 @@
#include "SystemOutputFramePool.h"
#include <algorithm>
namespace
{
SystemOutputFramePoolConfig NormalizeConfig(SystemOutputFramePoolConfig config)
{
if (config.rowBytes == 0)
config.rowBytes = VideoIORowBytes(config.pixelFormat, config.width);
return config;
}
}
SystemOutputFramePool::SystemOutputFramePool(const SystemOutputFramePoolConfig& config)
{
Configure(config);
}
void SystemOutputFramePool::Configure(const SystemOutputFramePoolConfig& config)
{
std::lock_guard<std::mutex> lock(mMutex);
mConfig = NormalizeConfig(config);
mReadySlots.clear();
mSlots.clear();
mSlots.resize(mConfig.capacity);
const std::size_t byteCount = FrameByteCount();
for (StoredSlot& slot : mSlots)
{
slot.bytes.resize(byteCount);
slot.state = OutputFrameSlotState::Free;
++slot.generation;
}
mAcquireMissCount = 0;
mReadyUnderrunCount = 0;
}
SystemOutputFramePoolConfig SystemOutputFramePool::Config() const
{
std::lock_guard<std::mutex> lock(mMutex);
return mConfig;
}
bool SystemOutputFramePool::AcquireFreeSlot(OutputFrameSlot& slot)
{
std::lock_guard<std::mutex> lock(mMutex);
for (std::size_t index = 0; index < mSlots.size(); ++index)
{
if (mSlots[index].state != OutputFrameSlotState::Free)
continue;
mSlots[index].state = OutputFrameSlotState::Acquired;
++mSlots[index].generation;
FillOutputSlotLocked(index, slot);
return true;
}
slot = OutputFrameSlot();
++mAcquireMissCount;
return false;
}
bool SystemOutputFramePool::PublishReadySlot(const OutputFrameSlot& slot)
{
std::lock_guard<std::mutex> lock(mMutex);
if (!TransitionSlotLocked(slot, OutputFrameSlotState::Acquired, OutputFrameSlotState::Ready))
return false;
mReadySlots.push_back(slot.index);
return true;
}
bool SystemOutputFramePool::ConsumeReadySlot(OutputFrameSlot& slot)
{
std::lock_guard<std::mutex> lock(mMutex);
while (!mReadySlots.empty())
{
const std::size_t index = mReadySlots.front();
mReadySlots.pop_front();
if (index >= mSlots.size() || mSlots[index].state != OutputFrameSlotState::Ready)
continue;
mSlots[index].state = OutputFrameSlotState::Consumed;
FillOutputSlotLocked(index, slot);
return true;
}
slot = OutputFrameSlot();
++mReadyUnderrunCount;
return false;
}
bool SystemOutputFramePool::MarkScheduled(const OutputFrameSlot& slot)
{
std::lock_guard<std::mutex> lock(mMutex);
if (!IsValidSlotLocked(slot))
return false;
if (mSlots[slot.index].state != OutputFrameSlotState::Ready &&
mSlots[slot.index].state != OutputFrameSlotState::Consumed)
{
return false;
}
RemoveReadyIndexLocked(slot.index);
mSlots[slot.index].state = OutputFrameSlotState::Scheduled;
return true;
}
bool SystemOutputFramePool::MarkScheduledByBuffer(void* bytes)
{
if (bytes == nullptr)
return false;
std::lock_guard<std::mutex> lock(mMutex);
for (std::size_t index = 0; index < mSlots.size(); ++index)
{
if (mSlots[index].bytes.empty() || mSlots[index].bytes.data() != bytes)
continue;
if (mSlots[index].state != OutputFrameSlotState::Ready &&
mSlots[index].state != OutputFrameSlotState::Consumed)
{
return false;
}
RemoveReadyIndexLocked(index);
mSlots[index].state = OutputFrameSlotState::Scheduled;
return true;
}
return false;
}
bool SystemOutputFramePool::ReleaseSlot(const OutputFrameSlot& slot)
{
std::lock_guard<std::mutex> lock(mMutex);
if (!IsValidSlotLocked(slot) || mSlots[slot.index].state == OutputFrameSlotState::Free)
return false;
return ReleaseSlotByIndexLocked(slot.index);
}
bool SystemOutputFramePool::ReleaseScheduledSlot(const OutputFrameSlot& slot)
{
std::lock_guard<std::mutex> lock(mMutex);
return TransitionSlotLocked(slot, OutputFrameSlotState::Scheduled, OutputFrameSlotState::Free);
}
bool SystemOutputFramePool::ReleaseSlotByBuffer(void* bytes)
{
if (bytes == nullptr)
return false;
std::lock_guard<std::mutex> lock(mMutex);
for (std::size_t index = 0; index < mSlots.size(); ++index)
{
if (!mSlots[index].bytes.empty() && mSlots[index].bytes.data() == bytes)
return ReleaseSlotByIndexLocked(index);
}
return false;
}
void SystemOutputFramePool::Clear()
{
std::lock_guard<std::mutex> lock(mMutex);
mReadySlots.clear();
for (StoredSlot& slot : mSlots)
{
slot.state = OutputFrameSlotState::Free;
++slot.generation;
}
}
SystemOutputFramePoolMetrics SystemOutputFramePool::GetMetrics() const
{
std::lock_guard<std::mutex> lock(mMutex);
SystemOutputFramePoolMetrics metrics;
metrics.capacity = mSlots.size();
metrics.readyCount = mReadySlots.size();
metrics.acquireMissCount = mAcquireMissCount;
metrics.readyUnderrunCount = mReadyUnderrunCount;
for (const StoredSlot& slot : mSlots)
{
switch (slot.state)
{
case OutputFrameSlotState::Free:
++metrics.freeCount;
break;
case OutputFrameSlotState::Acquired:
++metrics.acquiredCount;
break;
case OutputFrameSlotState::Ready:
break;
case OutputFrameSlotState::Consumed:
++metrics.consumedCount;
break;
case OutputFrameSlotState::Scheduled:
++metrics.scheduledCount;
break;
}
}
return metrics;
}
bool SystemOutputFramePool::IsValidSlotLocked(const OutputFrameSlot& slot) const
{
return slot.index < mSlots.size() && mSlots[slot.index].generation == slot.generation;
}
bool SystemOutputFramePool::TransitionSlotLocked(const OutputFrameSlot& slot, OutputFrameSlotState expectedState, OutputFrameSlotState nextState)
{
if (!IsValidSlotLocked(slot) || mSlots[slot.index].state != expectedState)
return false;
mSlots[slot.index].state = nextState;
return true;
}
void SystemOutputFramePool::FillOutputSlotLocked(std::size_t index, OutputFrameSlot& slot)
{
StoredSlot& storedSlot = mSlots[index];
slot.index = index;
slot.generation = storedSlot.generation;
slot.frame.bytes = storedSlot.bytes.empty() ? nullptr : storedSlot.bytes.data();
slot.frame.rowBytes = static_cast<long>(mConfig.rowBytes);
slot.frame.width = mConfig.width;
slot.frame.height = mConfig.height;
slot.frame.pixelFormat = mConfig.pixelFormat;
slot.frame.nativeFrame = nullptr;
slot.frame.nativeBuffer = slot.frame.bytes;
}
void SystemOutputFramePool::RemoveReadyIndexLocked(std::size_t index)
{
mReadySlots.erase(std::remove(mReadySlots.begin(), mReadySlots.end(), index), mReadySlots.end());
}
bool SystemOutputFramePool::ReleaseSlotByIndexLocked(std::size_t index)
{
if (index >= mSlots.size() || mSlots[index].state == OutputFrameSlotState::Free)
return false;
RemoveReadyIndexLocked(index);
mSlots[index].state = OutputFrameSlotState::Free;
return true;
}
std::size_t SystemOutputFramePool::FrameByteCount() const
{
return static_cast<std::size_t>(mConfig.rowBytes) * static_cast<std::size_t>(mConfig.height);
}

View File

@@ -0,0 +1,90 @@
#pragma once
#include "VideoIOTypes.h"
#include <cstddef>
#include <cstdint>
#include <deque>
#include <mutex>
#include <vector>
enum class OutputFrameSlotState
{
Free,
Acquired,
Ready,
Consumed,
Scheduled
};
struct SystemOutputFramePoolConfig
{
unsigned width = 0;
unsigned height = 0;
VideoIOPixelFormat pixelFormat = VideoIOPixelFormat::Bgra8;
unsigned rowBytes = 0;
std::size_t capacity = 0;
};
struct OutputFrameSlot
{
VideoIOOutputFrame frame;
std::size_t index = 0;
uint64_t generation = 0;
};
struct SystemOutputFramePoolMetrics
{
std::size_t capacity = 0;
std::size_t freeCount = 0;
std::size_t acquiredCount = 0;
std::size_t readyCount = 0;
std::size_t consumedCount = 0;
std::size_t scheduledCount = 0;
uint64_t acquireMissCount = 0;
uint64_t readyUnderrunCount = 0;
};
class SystemOutputFramePool
{
public:
SystemOutputFramePool() = default;
explicit SystemOutputFramePool(const SystemOutputFramePoolConfig& config);
void Configure(const SystemOutputFramePoolConfig& config);
SystemOutputFramePoolConfig Config() const;
bool AcquireFreeSlot(OutputFrameSlot& slot);
bool PublishReadySlot(const OutputFrameSlot& slot);
bool ConsumeReadySlot(OutputFrameSlot& slot);
bool MarkScheduled(const OutputFrameSlot& slot);
bool MarkScheduledByBuffer(void* bytes);
bool ReleaseSlot(const OutputFrameSlot& slot);
bool ReleaseScheduledSlot(const OutputFrameSlot& slot);
bool ReleaseSlotByBuffer(void* bytes);
void Clear();
SystemOutputFramePoolMetrics GetMetrics() const;
private:
struct StoredSlot
{
std::vector<unsigned char> bytes;
OutputFrameSlotState state = OutputFrameSlotState::Free;
uint64_t generation = 1;
};
bool IsValidSlotLocked(const OutputFrameSlot& slot) const;
bool TransitionSlotLocked(const OutputFrameSlot& slot, OutputFrameSlotState expectedState, OutputFrameSlotState nextState);
void FillOutputSlotLocked(std::size_t index, OutputFrameSlot& slot);
void RemoveReadyIndexLocked(std::size_t index);
bool ReleaseSlotByIndexLocked(std::size_t index);
std::size_t FrameByteCount() const;
mutable std::mutex mMutex;
SystemOutputFramePoolConfig mConfig;
std::vector<StoredSlot> mSlots;
std::deque<std::size_t> mReadySlots;
uint64_t mAcquireMissCount = 0;
uint64_t mReadyUnderrunCount = 0;
};

View File

@@ -34,6 +34,7 @@ void VideoBackend::ReleaseResources()
mReadyOutputQueue.Clear(); mReadyOutputQueue.Clear();
if (mVideoIODevice) if (mVideoIODevice)
mVideoIODevice->ReleaseResources(); mVideoIODevice->ReleaseResources();
mSystemOutputFramePool.Clear();
if (!VideoBackendLifecycle::CanTransition(mLifecycle.State(), VideoBackendLifecycleState::Stopped)) if (!VideoBackendLifecycle::CanTransition(mLifecycle.State(), VideoBackendLifecycleState::Stopped))
ApplyLifecycleFailure("Video backend resources released before lifecycle completed."); ApplyLifecycleFailure("Video backend resources released before lifecycle completed.");
ApplyLifecycleTransition(VideoBackendLifecycleState::Stopped, "Video backend resources released."); ApplyLifecycleTransition(VideoBackendLifecycleState::Stopped, "Video backend resources released.");
@@ -95,6 +96,14 @@ bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool exte
ApplyLifecycleFailure(error.empty() ? "Video backend output configuration failed." : error); ApplyLifecycleFailure(error.empty() ? "Video backend output configuration failed." : error);
return false; return false;
} }
SystemOutputFramePoolConfig poolConfig;
poolConfig.width = mVideoIODevice->OutputFrameWidth();
poolConfig.height = mVideoIODevice->OutputFrameHeight();
poolConfig.pixelFormat = mVideoIODevice->OutputPixelFormat();
poolConfig.rowBytes = mVideoIODevice->OutputFrameRowBytes();
poolConfig.capacity = mPlayoutPolicy.outputFramePoolSize;
mSystemOutputFramePool.Configure(poolConfig);
RecordSystemMemoryPlayoutStats();
return ApplyLifecycleTransition(VideoBackendLifecycleState::Configured, "Video backend configured."); return ApplyLifecycleTransition(VideoBackendLifecycleState::Configured, "Video backend configured.");
} }
@@ -460,6 +469,8 @@ std::chrono::milliseconds VideoBackend::OutputProducerWakeInterval() const
void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion) void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion)
{ {
if (completion.outputFrameBuffer != nullptr)
mSystemOutputFramePool.ReleaseSlotByBuffer(completion.outputFrameBuffer);
RecordFramePacing(completion.result); RecordFramePacing(completion.result);
PublishOutputFrameCompleted(completion); PublishOutputFrameCompleted(completion);
const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics(); const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics();
@@ -483,6 +494,7 @@ void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& complet
} }
NotifyOutputProducer(); NotifyOutputProducer();
RecordBackendPlayoutHealth(completion.result, recoveryDecision); RecordBackendPlayoutHealth(completion.result, recoveryDecision);
RecordSystemMemoryPlayoutStats();
} }
void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision) void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision)
@@ -582,10 +594,12 @@ OutputProductionPressure VideoBackend::BuildOutputProductionPressure(const Rende
bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion) bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion)
{ {
const auto renderStart = std::chrono::steady_clock::now(); const auto renderStart = std::chrono::steady_clock::now();
OutputFrameSlot outputSlot;
VideoIOOutputFrame outputFrame; VideoIOOutputFrame outputFrame;
const auto acquireStart = std::chrono::steady_clock::now(); const auto acquireStart = std::chrono::steady_clock::now();
if (!BeginOutputFrame(outputFrame)) if (!mSystemOutputFramePool.AcquireFreeSlot(outputSlot))
return false; return false;
outputFrame = outputSlot.frame;
const auto acquireEnd = std::chrono::steady_clock::now(); const auto acquireEnd = std::chrono::steady_clock::now();
bool rendered = true; bool rendered = true;
@@ -595,7 +609,7 @@ bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const Video
const auto renderRequestEnd = std::chrono::steady_clock::now(); const auto renderRequestEnd = std::chrono::steady_clock::now();
const auto endAccessStart = std::chrono::steady_clock::now(); const auto endAccessStart = std::chrono::steady_clock::now();
EndOutputFrame(outputFrame); const bool publishedReady = mSystemOutputFramePool.PublishReadySlot(outputSlot);
const auto endAccessEnd = std::chrono::steady_clock::now(); const auto endAccessEnd = std::chrono::steady_clock::now();
const double acquireMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(acquireEnd - acquireStart).count(); const double acquireMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(acquireEnd - acquireStart).count();
const double renderRequestMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderRequestEnd - renderRequestStart).count(); const double renderRequestMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderRequestEnd - renderRequestStart).count();
@@ -603,15 +617,17 @@ bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const Video
if (!rendered) if (!rendered)
{ {
mSystemOutputFramePool.ReleaseSlot(outputSlot);
ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame."); ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame.");
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>( const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
std::chrono::steady_clock::now() - renderStart).count(); std::chrono::steady_clock::now() - renderStart).count();
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds); RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
if (outputFrame.nativeFrame != nullptr) return false;
{ }
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
outputFrame.nativeFrame = nullptr; if (!publishedReady)
} {
mSystemOutputFramePool.ReleaseSlot(outputSlot);
return false; return false;
} }
@@ -622,12 +638,13 @@ bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const Video
RenderOutputFrame readyFrame; RenderOutputFrame readyFrame;
readyFrame.frame = outputFrame; readyFrame.frame = outputFrame;
readyFrame.frameIndex = ++mNextReadyOutputFrameIndex; readyFrame.frameIndex = ++mNextReadyOutputFrameIndex;
readyFrame.releaseFrame = [this](VideoIOOutputFrame& frame) {
mSystemOutputFramePool.ReleaseSlotByBuffer(frame.bytes);
};
const bool pushed = mReadyOutputQueue.Push(readyFrame); const bool pushed = mReadyOutputQueue.Push(readyFrame);
if (!pushed && outputFrame.nativeFrame != nullptr) if (!pushed)
{ mSystemOutputFramePool.ReleaseSlot(outputSlot);
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release(); RecordSystemMemoryPlayoutStats();
outputFrame.nativeFrame = nullptr;
}
return pushed; return pushed;
} }
@@ -638,10 +655,21 @@ bool VideoBackend::ScheduleReadyOutputFrame()
return false; return false;
RecordReadyQueueDepthSample(mReadyOutputQueue.GetMetrics()); RecordReadyQueueDepthSample(mReadyOutputQueue.GetMetrics());
if (!ScheduleOutputFrame(readyFrame.frame)) if (!mSystemOutputFramePool.MarkScheduledByBuffer(readyFrame.frame.bytes))
{
if (readyFrame.releaseFrame)
readyFrame.releaseFrame(readyFrame.frame);
return false; return false;
}
if (!ScheduleOutputFrame(readyFrame.frame))
{
mSystemOutputFramePool.ReleaseSlotByBuffer(readyFrame.frame.bytes);
return false;
}
PublishOutputFrameScheduled(readyFrame.frame); PublishOutputFrameScheduled(readyFrame.frame);
RecordSystemMemoryPlayoutStats();
return true; return true;
} }
@@ -721,6 +749,21 @@ void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& m
++mReadyQueueZeroDepthCount; ++mReadyQueueZeroDepthCount;
} }
void VideoBackend::RecordSystemMemoryPlayoutStats()
{
const SystemOutputFramePoolMetrics poolMetrics = mSystemOutputFramePool.GetMetrics();
const RenderOutputQueueMetrics queueMetrics = mReadyOutputQueue.GetMetrics();
mHealthTelemetry.TryRecordSystemMemoryPlayoutStats(
poolMetrics.freeCount,
poolMetrics.readyCount,
poolMetrics.scheduledCount,
poolMetrics.readyUnderrunCount,
0,
queueMetrics.droppedCount,
0.0,
0.0);
}
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds) void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds)
{ {
std::lock_guard<std::mutex> lock(mOutputMetricsMutex); std::lock_guard<std::mutex> lock(mOutputMetricsMutex);

View File

@@ -2,6 +2,7 @@
#include "OutputProductionController.h" #include "OutputProductionController.h"
#include "RenderOutputQueue.h" #include "RenderOutputQueue.h"
#include "SystemOutputFramePool.h"
#include "VideoBackendLifecycle.h" #include "VideoBackendLifecycle.h"
#include "VideoIOTypes.h" #include "VideoIOTypes.h"
#include "VideoPlayoutPolicy.h" #include "VideoPlayoutPolicy.h"
@@ -84,6 +85,7 @@ private:
bool ScheduleBlackUnderrunFrame(); bool ScheduleBlackUnderrunFrame();
void RecordFramePacing(VideoIOCompletionResult completionResult); void RecordFramePacing(VideoIOCompletionResult completionResult);
void RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics); void RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics);
void RecordSystemMemoryPlayoutStats();
void RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds); void RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds);
bool ApplyLifecycleTransition(VideoBackendLifecycleState state, const std::string& message); bool ApplyLifecycleTransition(VideoBackendLifecycleState state, const std::string& message);
bool ApplyLifecycleFailure(const std::string& message); bool ApplyLifecycleFailure(const std::string& message);
@@ -102,6 +104,7 @@ private:
VideoPlayoutPolicy mPlayoutPolicy; VideoPlayoutPolicy mPlayoutPolicy;
OutputProductionController mOutputProductionController; OutputProductionController mOutputProductionController;
RenderOutputQueue mReadyOutputQueue; RenderOutputQueue mReadyOutputQueue;
SystemOutputFramePool mSystemOutputFramePool;
std::unique_ptr<VideoIODevice> mVideoIODevice; std::unique_ptr<VideoIODevice> mVideoIODevice;
std::unique_ptr<OpenGLVideoIOBridge> mBridge; std::unique_ptr<OpenGLVideoIOBridge> mBridge;
std::mutex mOutputCompletionMutex; std::mutex mOutputCompletionMutex;

View File

@@ -76,6 +76,7 @@ struct VideoIOOutputFrame
struct VideoIOCompletion struct VideoIOCompletion
{ {
VideoIOCompletionResult result = VideoIOCompletionResult::Completed; VideoIOCompletionResult result = VideoIOCompletionResult::Completed;
void* outputFrameBuffer = nullptr;
}; };
struct VideoIOScheduleTime struct VideoIOScheduleTime

View File

@@ -1,6 +1,7 @@
#include "DeckLinkSession.h" #include "DeckLinkSession.h"
#include <atlbase.h> #include <atlbase.h>
#include <atomic>
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <new> #include <new>
@@ -10,6 +11,75 @@
namespace namespace
{ {
class SystemMemoryDeckLinkVideoBuffer : public IDeckLinkVideoBuffer
{
public:
SystemMemoryDeckLinkVideoBuffer(void* bytes, unsigned long long sizeBytes) :
mBytes(bytes),
mSizeBytes(sizeBytes),
mRefCount(1)
{
}
HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, LPVOID* ppv) override
{
if (ppv == nullptr)
return E_POINTER;
if (iid == IID_IUnknown || iid == IID_IDeckLinkVideoBuffer)
{
*ppv = static_cast<IDeckLinkVideoBuffer*>(this);
AddRef();
return S_OK;
}
*ppv = nullptr;
return E_NOINTERFACE;
}
ULONG STDMETHODCALLTYPE AddRef() override
{
return ++mRefCount;
}
ULONG STDMETHODCALLTYPE Release() override
{
const ULONG refCount = --mRefCount;
if (refCount == 0)
delete this;
return refCount;
}
HRESULT STDMETHODCALLTYPE GetBytes(void** buffer) override
{
if (buffer == nullptr)
return E_POINTER;
*buffer = mBytes;
return mBytes != nullptr ? S_OK : E_FAIL;
}
HRESULT STDMETHODCALLTYPE GetSize(unsigned long long* bufferSize) override
{
if (bufferSize == nullptr)
return E_POINTER;
*bufferSize = mSizeBytes;
return S_OK;
}
HRESULT STDMETHODCALLTYPE StartAccess(BMDBufferAccessFlags) override
{
return S_OK;
}
HRESULT STDMETHODCALLTYPE EndAccess(BMDBufferAccessFlags) override
{
return S_OK;
}
private:
void* mBytes = nullptr;
unsigned long long mSizeBytes = 0;
std::atomic<ULONG> mRefCount;
};
std::string BstrToUtf8(BSTR value) std::string BstrToUtf8(BSTR value)
{ {
if (value == nullptr) if (value == nullptr)
@@ -460,6 +530,48 @@ bool DeckLinkSession::ScheduleFrame(IDeckLinkMutableVideoFrame* outputVideoFrame
output->ScheduleVideoFrame(outputVideoFrame, scheduleTime.streamTime, scheduleTime.duration, scheduleTime.timeScale) == S_OK; output->ScheduleVideoFrame(outputVideoFrame, scheduleTime.streamTime, scheduleTime.duration, scheduleTime.timeScale) == S_OK;
} }
bool DeckLinkSession::ScheduleSystemMemoryFrame(const VideoIOOutputFrame& frame)
{
if (output == nullptr || frame.bytes == nullptr || frame.rowBytes <= 0 || frame.height == 0)
return false;
CComPtr<IDeckLinkVideoBuffer> videoBuffer;
videoBuffer.Attach(new (std::nothrow) SystemMemoryDeckLinkVideoBuffer(
frame.bytes,
static_cast<unsigned long long>(frame.rowBytes) * static_cast<unsigned long long>(frame.height)));
if (videoBuffer == nullptr)
return false;
CComPtr<IDeckLinkMutableVideoFrame> outputVideoFrame;
const BMDPixelFormat pixelFormat = DeckLinkPixelFormatForVideoIO(frame.pixelFormat);
if (output->CreateVideoFrameWithBuffer(
frame.width,
frame.height,
frame.rowBytes,
pixelFormat,
bmdFrameFlagFlipVertical,
videoBuffer,
&outputVideoFrame) != S_OK)
{
return false;
}
IDeckLinkVideoFrame* scheduledFrame = outputVideoFrame;
{
std::lock_guard<std::mutex> lock(mScheduledSystemFrameMutex);
mScheduledSystemFrameBuffers[scheduledFrame] = frame.bytes;
}
if (ScheduleFrame(outputVideoFrame))
return true;
{
std::lock_guard<std::mutex> lock(mScheduledSystemFrameMutex);
mScheduledSystemFrameBuffers.erase(scheduledFrame);
}
return false;
}
bool DeckLinkSession::ScheduleBlackFrame(IDeckLinkMutableVideoFrame* outputVideoFrame) bool DeckLinkSession::ScheduleBlackFrame(IDeckLinkMutableVideoFrame* outputVideoFrame)
{ {
if (outputVideoFrame == nullptr) if (outputVideoFrame == nullptr)
@@ -505,6 +617,9 @@ VideoPlayoutRecoveryDecision DeckLinkSession::AccountForCompletionResult(VideoIO
bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame) bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame)
{ {
if (frame.nativeFrame == nullptr)
return ScheduleSystemMemoryFrame(frame);
IDeckLinkMutableVideoFrame* outputVideoFrame = static_cast<IDeckLinkMutableVideoFrame*>(frame.nativeFrame); IDeckLinkMutableVideoFrame* outputVideoFrame = static_cast<IDeckLinkMutableVideoFrame*>(frame.nativeFrame);
const bool scheduled = ScheduleFrame(outputVideoFrame); const bool scheduled = ScheduleFrame(outputVideoFrame);
if (outputVideoFrame != nullptr) if (outputVideoFrame != nullptr)
@@ -621,13 +736,29 @@ void DeckLinkSession::HandleVideoInputFrame(IDeckLinkVideoInputFrame* inputFrame
void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult) void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult)
{ {
void* completedSystemBuffer = nullptr;
if (completedFrame != nullptr) if (completedFrame != nullptr)
{ {
CComPtr<IDeckLinkMutableVideoFrame> reusableFrame; bool externalSystemFrame = false;
if (completedFrame->QueryInterface(IID_IDeckLinkMutableVideoFrame, reinterpret_cast<void**>(&reusableFrame)) == S_OK &&
reusableFrame != nullptr)
{ {
outputVideoFrameQueue.push_back(reusableFrame); std::lock_guard<std::mutex> lock(mScheduledSystemFrameMutex);
auto externalFrame = mScheduledSystemFrameBuffers.find(completedFrame);
if (externalFrame != mScheduledSystemFrameBuffers.end())
{
completedSystemBuffer = externalFrame->second;
mScheduledSystemFrameBuffers.erase(externalFrame);
externalSystemFrame = true;
}
}
if (!externalSystemFrame)
{
CComPtr<IDeckLinkMutableVideoFrame> reusableFrame;
if (completedFrame->QueryInterface(IID_IDeckLinkMutableVideoFrame, reinterpret_cast<void**>(&reusableFrame)) == S_OK &&
reusableFrame != nullptr)
{
outputVideoFrameQueue.push_back(reusableFrame);
}
} }
} }
@@ -636,6 +767,7 @@ void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame* completed
VideoIOCompletion completion; VideoIOCompletion completion;
completion.result = TranslateCompletionResult(completionResult); completion.result = TranslateCompletionResult(completionResult);
completion.outputFrameBuffer = completedSystemBuffer;
mOutputFrameCallback(completion); mOutputFrameCallback(completion);
} }

View File

@@ -11,7 +11,9 @@
#include <atlbase.h> #include <atlbase.h>
#include <deque> #include <deque>
#include <mutex>
#include <string> #include <string>
#include <unordered_map>
class OpenGLComposite; class OpenGLComposite;
@@ -70,6 +72,7 @@ private:
bool AcquireNextOutputVideoFrame(CComPtr<IDeckLinkMutableVideoFrame>& outputVideoFrame); bool AcquireNextOutputVideoFrame(CComPtr<IDeckLinkMutableVideoFrame>& outputVideoFrame);
bool PopulateOutputFrame(IDeckLinkMutableVideoFrame* outputVideoFrame, VideoIOOutputFrame& frame); bool PopulateOutputFrame(IDeckLinkMutableVideoFrame* outputVideoFrame, VideoIOOutputFrame& frame);
bool ScheduleFrame(IDeckLinkMutableVideoFrame* outputVideoFrame); bool ScheduleFrame(IDeckLinkMutableVideoFrame* outputVideoFrame);
bool ScheduleSystemMemoryFrame(const VideoIOOutputFrame& frame);
bool ScheduleBlackFrame(IDeckLinkMutableVideoFrame* outputVideoFrame); bool ScheduleBlackFrame(IDeckLinkMutableVideoFrame* outputVideoFrame);
static VideoIOCompletionResult TranslateCompletionResult(BMDOutputFrameCompletionResult completionResult); static VideoIOCompletionResult TranslateCompletionResult(BMDOutputFrameCompletionResult completionResult);
@@ -79,6 +82,8 @@ private:
CComPtr<IDeckLinkOutput> output; CComPtr<IDeckLinkOutput> output;
CComPtr<IDeckLinkKeyer> keyer; CComPtr<IDeckLinkKeyer> keyer;
std::deque<CComPtr<IDeckLinkMutableVideoFrame>> outputVideoFrameQueue; std::deque<CComPtr<IDeckLinkMutableVideoFrame>> outputVideoFrameQueue;
std::mutex mScheduledSystemFrameMutex;
std::unordered_map<IDeckLinkVideoFrame*, void*> mScheduledSystemFrameBuffers;
VideoIOState mState; VideoIOState mState;
VideoPlayoutPolicy mPlayoutPolicy; VideoPlayoutPolicy mPlayoutPolicy;
VideoPlayoutScheduler mScheduler; VideoPlayoutScheduler mScheduler;

View File

@@ -163,3 +163,53 @@ Read:
Removing ongoing GPU readback recovers output timing immediately. The direct cause of the Phase 7.5 playback collapse is the per-frame GPU-to-CPU readback cost, not DeckLink frame acquisition, output frame end-access, PBO allocation, fence waiting, or CPU copy. Removing ongoing GPU readback recovers output timing immediately. The direct cause of the Phase 7.5 playback collapse is the per-frame GPU-to-CPU readback cost, not DeckLink frame acquisition, output frame end-access, PBO allocation, fence waiting, or CPU copy.
The internal ready queue depth still being low while DeckLink reports a healthy device buffer suggests the ready queue is acting as a short staging queue rather than the full device playout buffer. For the next fix, prioritize avoiding a blocking readback on every output frame instead of only increasing internal ready queue depth. The internal ready queue depth still being low while DeckLink reports a healthy device buffer suggests the ready queue is acting as a short staging queue rather than the full device playout buffer. For the next fix, prioritize avoiding a blocking readback on every output frame instead of only increasing internal ready queue depth.
## Experiment 4: BGRA8 pack framebuffer async readback
Status: sampled
Date: 2026-05-11
Change:
- The output path now packs/blits the final output into a BGRA8-compatible framebuffer before readback.
- Async readback reads from the pack framebuffer using `GL_BGRA` / `GL_UNSIGNED_INT_8_8_8_8_REV`.
- The deeper async PBO ring remains active.
Question:
Does making the GPU output/readback format match the DeckLink BGRA8 scheduling format reduce the driver-side `glReadPixels` stall?
User-visible result:
- Long pauses appear to be gone.
- Playback still stutters, but the stutters look limited to a few frames rather than multi-second freezes.
Telemetry summary:
- Throughput recovered to roughly real time in the sampled window.
- Over 5 seconds, the app pushed and popped 305 output frames.
- `asyncQueueReadPixelsMs` dropped from the earlier 8-14 ms range to roughly 0.05-0.13 ms in the representative samples.
- `renderMs` usually sat around 2-5 ms in the sampled burst.
- Late and dropped frame counts did not increase during the 5 second delta sample.
- The ready queue still repeatedly touched 0 and accumulated underruns, which matches the remaining short stutters.
Representative samples:
| readyDepth | renderMs | smoothedRenderMs | drawMs | mapMs | copyMs | asyncQueueReadPixelsMs | queueWaitMs |
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
| 0 | 4.855 | 9.494 | 0.570 | 0.234 | 0.822 | 0.128 | 0.026 |
| 0 | 1.957 | 9.041 | 0.468 | 0.139 | 0.604 | 0.048 | 0.016 |
| 0 | 3.366 | 5.879 | 0.513 | 1.166 | 0.692 | 0.129 | 0.022 |
| 0 | 5.208 | 6.492 | 2.209 | 1.358 | 0.714 | 0.090 | 0.061 |
| 0 | 2.957 | 8.852 | 0.537 | 1.041 | 0.547 | 0.087 | 0.040 |
Five-second delta:
| pushed | popped | ready underruns | zero-depth samples | late delta | dropped delta | scheduled lead |
| ---: | ---: | ---: | ---: | ---: | ---: | ---: |
| 305 | 305 | 129 | 671 | 0 | 0 | 20 |
Read:
The main readback stall appears to have been the previous format/path combination, not unavoidable BGRA8 bandwidth. The remaining problem now looks like cadence and buffering: the producer can average real-time throughput again, but the ready queue still runs empty often enough to create visible short stutters.

View File

@@ -0,0 +1,352 @@
# Phase 7.6: System-Memory Playout Buffer Design
## Status
In progress.
Implemented so far:
- BGRA8 `SystemOutputFramePool` with non-GL tests
- render/readback production now writes into app-owned system-memory slots
- DeckLink output scheduling can wrap system-memory slots with `CreateVideoFrameWithBuffer()`
- DeckLink completion callbacks release scheduled system-memory slots
- ready-queue discard paths release owned frames instead of leaking slots
- telemetry scaffolding exposes free, ready, and scheduled system-memory frame counts
- async PBO readback is now a deeper pipeline by default and ordinary misses no longer flush queued readbacks
Still to verify/tune on hardware:
- sustained DeckLink buffer depth
- frame age at schedule/completion
- repeat/underrun policy behavior under real stalls
- whether deeper async readback reduces sawtooth buffer drain
- whether BGRA8 bandwidth is sufficient before considering v210
Phase 7.5 isolated the current playout timing problem around output readback and DeckLink scheduling pressure. The fast-transfer path from the DeckLink OpenGL sample is not available on the current test GPU, so the next direction is to make the normal path behave more like broadcast playout systems: render ahead, read back into system-memory frame buffers, and let DeckLink consume already-complete frames.
This phase is not a move away from rendering every frame. It is a move away from making DeckLink wait for each frame to be rendered and read back at the moment it needs to be scheduled.
## SDK Finding: RGBA8 Is Not Required
DeckLink output frames do not have to be RGBA8/BGRA8.
The SDK accepts a `BMDPixelFormat` when creating output frames. The available formats include:
- `bmdFormat8BitYUV`
- `bmdFormat10BitYUV`
- `bmdFormat10BitYUVA`
- `bmdFormat8BitARGB`
- `bmdFormat8BitBGRA`
- `bmdFormat10BitRGB`
- `bmdFormat12BitRGB`
The SDK samples also use non-RGBA output paths:
- `FilePlayback` converts unsupported source frames to `bmdFormat10BitYUV`
- `PlaybackStills` uses `bmdFormat10BitYUV`
- `InputLoopThrough` handles `bmdFormat10BitYUV` and related formats
- `SignalGen` exposes 8-bit YUV, 10-bit YUV, 8-bit RGB, and 10-bit RGB choices
- `OpenGLOutput` uses BGRA for that sample path, but that is not a DeckLink API requirement
The current app already has partial support for this direction:
- `DeckLinkSession` probes support for `bmdFormat10BitYUV` and `bmdFormat10BitYUVA`
- `VideoIOPixelFormat::V210` maps to `bmdFormat10BitYUV`
- `VideoIOPixelFormat::Yuva10` maps to `bmdFormat10BitYUVA`
- `RenderEngine` has a 10-bit output packing path
- row-byte calculation already distinguishes `bmdFormat10BitYUV`, `bmdFormat10BitYUVA`, and BGRA-style formats
So yes: we can pack before readback later if bandwidth proves to be the remaining bottleneck. For the first Phase 7.6 implementation, keep BGRA8 as the active output format and focus on the larger architectural problem: DeckLink should schedule from completed system-memory frames instead of waiting on the current render/readback operation.
## Goal
Create a buffered system-memory playout path:
- render every output frame
- keep BGRA8 as the first output/readback format
- read back into reusable CPU/system-memory frame slots
- keep a small queue of completed frames ahead of DeckLink
- schedule DeckLink from completed frames rather than from in-progress rendering
- preserve telemetry so every experiment can be compared against Phase 7.5
## Non-Goals
- Do not reintroduce NVIDIA DVP or AMD pinned-memory as a required path.
- Do not hide dropped, repeated, or late frames.
- Do not make cached-output playback the default production behavior.
- Do not add a large latency buffer without making that latency explicit.
- Do not rewrite shader/effect evaluation unless profiling proves it is the bottleneck.
- Do not make v210/YUV packing part of the first implementation unless BGRA8 buffering is proven insufficient.
## Architecture
### Current Problem Shape
The current path is still too close to:
1. DeckLink needs a frame.
2. App renders or finalizes a frame.
3. App reads back from GL.
4. App schedules the frame.
That can work only if every step reliably fits inside the frame budget. When readback stalls or scheduling is delayed, DeckLink sees a shallow buffer and playback freezes.
### Target Shape
The target path is:
1. Render producer prepares future frames.
2. GPU output is read back as BGRA8 into the selected system frame slot.
3. Readback fills a free system-memory frame slot.
4. Completed slots enter a ready queue.
5. DeckLink scheduler consumes ready slots at output cadence.
6. Completion callbacks release slots back to the pool.
This gives the scheduler a small cushion without sacrificing rendered frames.
## Pixel Format Strategy
### First Target: BGRA8
Use BGRA8 as the first serious output target.
Reasons:
- it is the path closest to the current renderer
- it avoids introducing color-space packing risk while the buffering architecture is still being proven
- it keeps alpha/keying behavior easier to reason about
- it lets Phase 7.6 isolate scheduling/readback ownership from pixel-format conversion
Known byte cost at 1920x1080:
- BGRA8: about 8.29 MB per frame
That is larger than v210, but the immediate hypothesis is that the freezes come from scheduling coupling and readback stalls, not only raw byte count. Prove or disprove that with the system-memory queue first.
### Later Target: 10-bit YUV / v210
Keep v210 available as a later optimization.
Reasons to revisit it:
- it is a native DeckLink output format
- it can reduce 1920x1080 readback size from about 8.29 MB per BGRA8 frame to about 5.53 MB per v210 frame
- it may better match final video I/O expectations for fill-only output
Do this only after the BGRA8 system-memory queue is measured. If BGRA8 buffering keeps DeckLink healthy, v210 becomes a quality/bandwidth refinement rather than a rescue path.
### Alpha / Keying
For the first implementation, BGRA8 remains the default target.
For alpha/key workflows, `bmdFormat10BitYUVA` may be needed, or key/fill may need to remain split depending on the device mode and keyer configuration. Phase 7.6 should make this explicit rather than assuming one format fits both.
## Proposed Components
### `SystemOutputFramePool`
Owns reusable CPU-side frame slots.
Responsibilities:
- allocate a fixed number of output slots
- expose free slots to the render/readback producer
- expose completed slots to the DeckLink scheduler
- track slot generation, frame id, frame time, and pixel format
- prevent reuse while DeckLink still owns or may read the frame
### `OutputFrameSlot`
Represents one CPU/system-memory playout frame.
Likely contents:
- pointer to writable frame bytes
- row bytes
- width and height
- `BMDPixelFormat` or app-level equivalent
- frame number / stream time
- timing metadata
- completion state
- optional DeckLink frame wrapper
### `DeckLinkOutputFrameAdapter`
Bridges app-owned memory to DeckLink output frames.
Options to evaluate:
- create DeckLink frames with app-owned buffers where supported by the SDK
- keep DeckLink-created frames in the pool and write directly into their bytes
- wrap app memory behind a small `IDeckLinkVideoFrame` implementation only if needed
The simplest production path should avoid an extra CPU copy between app memory and DeckLink memory.
### `OutputFrameProducer`
Runs on or is driven by the render thread.
Responsibilities:
- acquire a free system frame slot
- render the next frame
- read back BGRA8 into the slot
- publish the slot to the ready queue
- record readback timings
### `DeckLinkPlayoutScheduler`
Consumes completed system frames.
Responsibilities:
- keep DeckLink scheduled ahead by the configured target depth
- schedule from the ready queue
- repeat/drop according to explicit policy when the queue is empty or too deep
- release frame slots after DeckLink completion callbacks
- report buffer depth and scheduling lead
## Migration Plan
### Step 1: Make Output Pixel Format Explicit Everywhere
Current format selection exists, but Phase 7.6 should make it impossible to confuse render texture format, readback format, and DeckLink scheduled format.
Deliverables:
- log selected DeckLink output pixel format at startup
- expose readback bytes per frame in telemetry
- expose whether the frame was BGRA, v210, or YUVA
- make BGRA8 the default and first supported system-buffer path
### Step 2: Introduce the BGRA8 System Frame Pool
Add a fixed-size pool of BGRA8 system-memory output slots.
Initial target depth:
- 3 ready/scheduled frames minimum
- 5 frames as the practical DeckLink-health target
- configurable for experiments
The pool should be testable without OpenGL or DeckLink hardware.
### Step 3: Read Back BGRA8 Into Pool Slots
Move readback output away from transient buffers and into acquired frame slots.
The producer must never block DeckLink scheduling while waiting for a free slot if a safe repeat/drop policy can keep playback alive.
### Step 4: Schedule From Completed Slots
Change DeckLink scheduling to consume completed system frames.
DeckLink callbacks should become the point where slots are returned to the pool.
This is the main behavioral change: scheduling no longer waits for the active render/readback operation.
### Step 5: Add Playout Policies
Make underflow and overflow behavior explicit.
Possible policies:
- repeat last completed frame on underflow
- schedule black on startup only
- drop oldest completed frame if the producer gets too far ahead
- preserve most recent frame for live-control responsiveness
The default should favor stable output cadence and visible telemetry over silent correctness guesses.
### Step 6: Tune Buffer Depth and Latency
Measure:
- render time
- readback time
- CPU copy time, if any
- ready queue depth
- scheduled queue depth
- frame age at schedule time
- frame age at display callback
- repeats, drops, and underruns
Then choose a default buffer depth that keeps DeckLink healthy without adding unnecessary latency.
### Step 7: Optional v210 Experiment
Only after BGRA8 buffering has been measured, add a runtime option that forces:
- GPU pack to v210
- readback of packed v210 bytes
- DeckLink scheduling as `bmdFormat10BitYUV`
This should be compared against the completed BGRA8 system-memory path, not against the older coupled path.
## Telemetry
Keep the Phase 7.5 counters and add:
- `outputPixelFormat`
- `outputReadbackBytes`
- `outputPackMode`
- `systemFramePoolFree`
- `systemFramePoolReady`
- `systemFramePoolScheduled`
- `systemFrameAgeAtScheduleMs`
- `systemFrameAgeAtCompletionMs`
- `systemFrameUnderruns`
- `systemFrameRepeats`
- `systemFrameDrops`
- `deckLinkScheduleLeadFrames`
- `deckLinkScheduleLeadMs`
Telemetry scaffolding can land before the frame pool itself. Until `SystemOutputFramePool` exists, these fields should remain producer-owned gauges/counters with default zero values in `HealthTelemetry`; they should not be inferred from the existing render-ready queue or DeckLink pool because those are adjacent concepts, not the final free/ready/scheduled system-memory slot model.
Existing counters that should remain useful:
- render frame time
- async queue time
- readback timing
- output queue depth
- displayed late count
- dropped count
- DeckLink buffered frame count
## Tests
Add non-GL tests for:
- frame pool acquire/publish/consume/release
- slots are not reused while scheduled
- underflow repeats the last completed frame when configured
- overflow drops according to policy
- row-byte and byte-size calculation for BGRA8 first, with v210 and YUVA covered when those modes are enabled
- scheduler consumes only completed frames
- completion callback releases the expected slot
Hardware/manual tests:
- BGRA8 system-buffered output works
- DeckLink buffer depth stays healthy
- no black-frame startup longer than configured preroll
- shutdown drains or releases scheduled slots safely
## Risks
- DeckLink frame ownership rules may force one extra copy if app-owned buffers are not accepted in the exact path we use.
- Buffering improves cadence but adds latency.
- If GPU readback itself remains slower than real time, buffering only delays the underflow.
- v210 remains a future optimization and may still carry color-space/keying risk when introduced.
## Exit Criteria
Phase 7.6 is complete when:
- DeckLink output format is explicit and logged
- BGRA8 system-memory output slots are the default playout path
- completed system-memory frames are queued ahead of DeckLink scheduling
- DeckLink callbacks release/recycle frame slots
- ready/scheduled buffer depth is visible in telemetry
- underflow/repeat/drop behavior is explicit and tested
- the app can sustain a healthy DeckLink buffer without using cached-output playback

View File

@@ -362,6 +362,9 @@ Expected observations:
Expected observations: Expected observations:
- current playout queue depth - current playout queue depth
- system-memory playout frame counts by state: free, ready, and scheduled
- system-memory playout underrun, repeat, and drop counters
- system-memory frame age at schedule and completion time
- input signal state - input signal state
- late frames - late frames
- dropped frames - dropped frames

View File

@@ -235,10 +235,15 @@ Examples:
Examples: Examples:
- output queue depth - output queue depth
- free system-memory playout frame count
- ready system-memory playout frame count
- scheduled system-memory playout frame count
- scheduled frame index - scheduled frame index
- completed frame index - completed frame index
- late frame count - late frame count
- dropped frame count - dropped frame count
- underrun/repeat/drop counters for system-memory playout policy
- frame age at schedule time and completion callback time
- spare buffer count - spare buffer count
- current headroom target - current headroom target

View File

@@ -200,6 +200,34 @@ void TestOutputRenderPipelineTiming()
Expect(playout.outputCachedFallbackCount == 1, "output render timing counts cached fallbacks"); Expect(playout.outputCachedFallbackCount == 1, "output render timing counts cached fallbacks");
Expect(playout.outputSyncFallbackCount == 1, "output render timing counts sync fallbacks"); Expect(playout.outputSyncFallbackCount == 1, "output render timing counts sync fallbacks");
} }
void TestSystemMemoryPlayoutStats()
{
HealthTelemetry telemetry;
telemetry.RecordSystemMemoryPlayoutStats(2, 3, 1, 4, 5, 6, 12.5, 24.0);
HealthTelemetry::BackendPlayoutSnapshot playout = telemetry.GetBackendPlayoutSnapshot();
Expect(playout.systemFramePoolFree == 2, "system-memory playout stores free frame count");
Expect(playout.systemFramePoolReady == 3, "system-memory playout stores ready frame count");
Expect(playout.systemFramePoolScheduled == 1, "system-memory playout stores scheduled frame count");
Expect(playout.systemFrameUnderrunCount == 4, "system-memory playout stores underrun count");
Expect(playout.systemFrameRepeatCount == 5, "system-memory playout stores repeat count");
Expect(playout.systemFrameDropCount == 6, "system-memory playout stores drop count");
Expect(playout.systemFrameAgeAtScheduleMilliseconds == 12.5, "system-memory playout stores schedule age");
Expect(playout.systemFrameAgeAtCompletionMilliseconds == 24.0, "system-memory playout stores completion age");
Expect(telemetry.TryRecordSystemMemoryPlayoutStats(1, 0, 2, 7, 8, 9, -1.0, -2.0),
"try system-memory playout stats succeeds when uncontended");
playout = telemetry.GetBackendPlayoutSnapshot();
Expect(playout.systemFramePoolFree == 1, "try system-memory playout stores free frame count");
Expect(playout.systemFramePoolReady == 0, "try system-memory playout stores ready frame count");
Expect(playout.systemFramePoolScheduled == 2, "try system-memory playout stores scheduled frame count");
Expect(playout.systemFrameUnderrunCount == 7, "try system-memory playout stores underrun count");
Expect(playout.systemFrameRepeatCount == 8, "try system-memory playout stores repeat count");
Expect(playout.systemFrameDropCount == 9, "try system-memory playout stores drop count");
Expect(playout.systemFrameAgeAtScheduleMilliseconds == 0.0, "system-memory playout clamps negative schedule age");
Expect(playout.systemFrameAgeAtCompletionMilliseconds == 0.0, "system-memory playout clamps negative completion age");
}
} }
int main() int main()
@@ -210,6 +238,7 @@ int main()
TestPersistenceWriteHealth(); TestPersistenceWriteHealth();
TestBackendPlayoutHealth(); TestBackendPlayoutHealth();
TestOutputRenderPipelineTiming(); TestOutputRenderPipelineTiming();
TestSystemMemoryPlayoutStats();
if (gFailures != 0) if (gFailures != 0)
{ {

View File

@@ -5,6 +5,7 @@
namespace namespace
{ {
int gFailures = 0; int gFailures = 0;
int gReleasedFrames = 0;
void Expect(bool condition, const char* message) void Expect(bool condition, const char* message)
{ {
@@ -23,6 +24,22 @@ RenderOutputFrame MakeFrame(uint64_t index)
return frame; return frame;
} }
void CountReleasedFrame(VideoIOOutputFrame& frame)
{
if (frame.nativeFrame != nullptr)
{
++gReleasedFrames;
frame.nativeFrame = nullptr;
}
}
RenderOutputFrame MakeOwnedFrame(uint64_t index)
{
RenderOutputFrame frame = MakeFrame(index);
frame.releaseFrame = CountReleasedFrame;
return frame;
}
void TestQueuePreservesOrdering() void TestQueuePreservesOrdering()
{ {
VideoPlayoutPolicy policy; VideoPlayoutPolicy policy;
@@ -58,6 +75,25 @@ void TestBoundedQueueDropsOldestFrame()
Expect(frame.frameIndex == 2, "oldest frame was dropped when queue overflowed"); Expect(frame.frameIndex == 2, "oldest frame was dropped when queue overflowed");
} }
void TestOverflowReleasesDroppedFrame()
{
gReleasedFrames = 0;
VideoPlayoutPolicy policy;
policy.targetReadyFrames = 1;
policy.maxReadyFrames = 1;
RenderOutputQueue queue(policy);
queue.Push(MakeOwnedFrame(1));
queue.Push(MakeOwnedFrame(2));
Expect(gReleasedFrames == 1, "overflow releases dropped ready frame");
RenderOutputFrame frame;
Expect(queue.TryPop(frame), "newest owned frame remains queued");
Expect(frame.frameIndex == 2, "overflow keeps newest owned frame");
Expect(gReleasedFrames == 1, "pop transfers ownership without releasing");
}
void TestUnderrunIsCounted() void TestUnderrunIsCounted()
{ {
RenderOutputQueue queue; RenderOutputQueue queue;
@@ -90,14 +126,53 @@ void TestConfigureShrinksDepthToNewCapacity()
Expect(queue.TryPop(frame), "trimmed queue still has newest frame"); Expect(queue.TryPop(frame), "trimmed queue still has newest frame");
Expect(frame.frameIndex == 3, "configure keeps newest ready frame"); Expect(frame.frameIndex == 3, "configure keeps newest ready frame");
} }
void TestConfigureReleasesTrimmedFrames()
{
gReleasedFrames = 0;
VideoPlayoutPolicy policy;
policy.maxReadyFrames = 3;
RenderOutputQueue queue(policy);
queue.Push(MakeOwnedFrame(1));
queue.Push(MakeOwnedFrame(2));
queue.Push(MakeOwnedFrame(3));
VideoPlayoutPolicy smallerPolicy;
smallerPolicy.targetReadyFrames = 1;
smallerPolicy.maxReadyFrames = 1;
queue.Configure(smallerPolicy);
Expect(gReleasedFrames == 2, "configure releases trimmed ready frames");
RenderOutputFrame frame;
Expect(queue.TryPop(frame), "trimmed owned queue still has newest frame");
Expect(frame.frameIndex == 3, "configure keeps newest owned frame after release");
}
void TestClearReleasesQueuedFrames()
{
gReleasedFrames = 0;
RenderOutputQueue queue;
queue.Push(MakeOwnedFrame(1));
queue.Push(MakeOwnedFrame(2));
queue.Clear();
RenderOutputQueueMetrics metrics = queue.GetMetrics();
Expect(metrics.depth == 0, "clear empties ready queue");
Expect(gReleasedFrames == 2, "clear releases queued ready frames");
}
} }
int main() int main()
{ {
TestQueuePreservesOrdering(); TestQueuePreservesOrdering();
TestBoundedQueueDropsOldestFrame(); TestBoundedQueueDropsOldestFrame();
TestOverflowReleasesDroppedFrame();
TestUnderrunIsCounted(); TestUnderrunIsCounted();
TestConfigureShrinksDepthToNewCapacity(); TestConfigureShrinksDepthToNewCapacity();
TestConfigureReleasesTrimmedFrames();
TestClearReleasesQueuedFrames();
if (gFailures != 0) if (gFailures != 0)
{ {

View File

@@ -0,0 +1,170 @@
#include "SystemOutputFramePool.h"
#include <cstdint>
#include <iostream>
namespace
{
int gFailures = 0;
void Expect(bool condition, const char* message)
{
if (condition)
return;
std::cerr << "FAIL: " << message << "\n";
++gFailures;
}
SystemOutputFramePoolConfig MakeConfig(std::size_t capacity = 2)
{
SystemOutputFramePoolConfig config;
config.width = 4;
config.height = 3;
config.pixelFormat = VideoIOPixelFormat::Bgra8;
config.capacity = capacity;
return config;
}
void TestAcquireHonorsCapacityAndFrameShape()
{
SystemOutputFramePool pool(MakeConfig(2));
OutputFrameSlot first;
OutputFrameSlot second;
OutputFrameSlot third;
Expect(pool.AcquireFreeSlot(first), "first slot can be acquired");
Expect(pool.AcquireFreeSlot(second), "second slot can be acquired");
Expect(!pool.AcquireFreeSlot(third), "fixed capacity rejects third acquire");
Expect(first.frame.bytes != nullptr, "acquired slot has system memory");
Expect(first.frame.nativeBuffer == first.frame.bytes, "native buffer points at system memory");
Expect(first.frame.nativeFrame == nullptr, "system frame has no native frame");
Expect(first.frame.width == 4, "frame width is configured");
Expect(first.frame.height == 3, "frame height is configured");
Expect(first.frame.rowBytes == 16, "BGRA8 row bytes are inferred");
Expect(first.frame.pixelFormat == VideoIOPixelFormat::Bgra8, "BGRA8 is the default output format");
Expect(first.frame.bytes != second.frame.bytes, "each slot owns distinct memory");
SystemOutputFramePoolMetrics metrics = pool.GetMetrics();
Expect(metrics.freeCount == 0, "all slots are in use");
Expect(metrics.acquiredCount == 2, "acquired slots are counted");
Expect(metrics.acquireMissCount == 1, "capacity miss is counted");
}
void TestReadySlotsAreConsumedFifo()
{
SystemOutputFramePool pool(MakeConfig(2));
OutputFrameSlot first;
OutputFrameSlot second;
Expect(pool.AcquireFreeSlot(first), "first FIFO slot can be acquired");
Expect(pool.AcquireFreeSlot(second), "second FIFO slot can be acquired");
Expect(pool.PublishReadySlot(first), "first FIFO slot can be published");
Expect(pool.PublishReadySlot(second), "second FIFO slot can be published");
OutputFrameSlot consumed;
Expect(pool.ConsumeReadySlot(consumed), "first ready slot can be consumed");
Expect(consumed.index == first.index, "first published slot is consumed first");
Expect(pool.MarkScheduled(consumed), "consumed slot can be marked scheduled");
Expect(pool.ReleaseScheduledSlot(consumed), "scheduled slot can be released");
Expect(pool.ConsumeReadySlot(consumed), "second ready slot can be consumed");
Expect(consumed.index == second.index, "second published slot is consumed second");
Expect(pool.ReleaseSlot(consumed), "consumed slot can be released without scheduling");
SystemOutputFramePoolMetrics metrics = pool.GetMetrics();
Expect(metrics.freeCount == 2, "released slots return to free pool");
Expect(metrics.readyCount == 0, "ready queue is empty after consumption");
}
void TestReadySlotCanBeScheduledByBuffer()
{
SystemOutputFramePool pool(MakeConfig(1));
OutputFrameSlot slot;
Expect(pool.AcquireFreeSlot(slot), "buffer schedule slot can be acquired");
void* bytes = slot.frame.bytes;
Expect(pool.PublishReadySlot(slot), "buffer schedule slot can be published");
Expect(pool.MarkScheduledByBuffer(bytes), "ready slot can be marked scheduled by buffer");
SystemOutputFramePoolMetrics metrics = pool.GetMetrics();
Expect(metrics.readyCount == 0, "scheduled-by-buffer removes slot from ready queue");
Expect(metrics.scheduledCount == 1, "scheduled-by-buffer counts scheduled slot");
Expect(pool.ReleaseSlotByBuffer(bytes), "scheduled slot can be released by buffer");
metrics = pool.GetMetrics();
Expect(metrics.freeCount == 1, "released-by-buffer slot returns to free pool");
}
void TestInvalidTransitionsAreRejected()
{
SystemOutputFramePool pool(MakeConfig(1));
OutputFrameSlot slot;
Expect(pool.AcquireFreeSlot(slot), "transition slot can be acquired");
Expect(!pool.MarkScheduled(slot), "acquired slot cannot be marked scheduled");
Expect(pool.PublishReadySlot(slot), "acquired slot can be published");
Expect(!pool.PublishReadySlot(slot), "ready slot cannot be published twice");
Expect(pool.ReleaseSlot(slot), "ready slot can be released to free");
Expect(!pool.ReleaseSlot(slot), "free slot cannot be released again");
OutputFrameSlot next;
Expect(pool.AcquireFreeSlot(next), "slot can be reacquired after release");
Expect(next.index == slot.index, "same storage slot can be reused");
Expect(next.generation != slot.generation, "stale handles are invalidated on reacquire");
Expect(!pool.PublishReadySlot(slot), "stale handle cannot publish reacquired slot");
}
void TestPixelFormatAwareSizing()
{
SystemOutputFramePoolConfig config;
config.width = 7;
config.height = 2;
config.pixelFormat = VideoIOPixelFormat::V210;
config.capacity = 1;
SystemOutputFramePool pool(config);
OutputFrameSlot slot;
Expect(pool.AcquireFreeSlot(slot), "v210 slot can be acquired");
Expect(slot.frame.pixelFormat == VideoIOPixelFormat::V210, "slot keeps configured pixel format");
Expect(slot.frame.rowBytes == static_cast<long>(MinimumV210RowBytes(config.width)), "v210 row bytes are inferred");
SystemOutputFramePoolConfig explicitConfig = config;
explicitConfig.pixelFormat = VideoIOPixelFormat::Uyvy8;
explicitConfig.rowBytes = 64;
pool.Configure(explicitConfig);
Expect(pool.AcquireFreeSlot(slot), "explicit row-byte slot can be acquired");
Expect(slot.frame.pixelFormat == VideoIOPixelFormat::Uyvy8, "slot keeps reconfigured pixel format");
Expect(slot.frame.rowBytes == 64, "explicit row bytes are preserved");
}
void TestEmptyReadyQueueUnderrunIsCounted()
{
SystemOutputFramePool pool(MakeConfig(1));
OutputFrameSlot slot;
Expect(!pool.ConsumeReadySlot(slot), "empty ready queue cannot be consumed");
SystemOutputFramePoolMetrics metrics = pool.GetMetrics();
Expect(metrics.readyUnderrunCount == 1, "ready underrun is counted");
}
}
int main()
{
TestAcquireHonorsCapacityAndFrameShape();
TestReadySlotsAreConsumedFifo();
TestReadySlotCanBeScheduledByBuffer();
TestInvalidTransitionsAreRejected();
TestPixelFormatAwareSizing();
TestEmptyReadyQueueUnderrunIsCounted();
if (gFailures != 0)
{
std::cerr << gFailures << " system output frame pool test failure(s).\n";
return 1;
}
std::cout << "SystemOutputFramePool tests passed.\n";
return 0;
}