Performance chasing
All checks were successful
CI / React UI Build (push) Successful in 10s
CI / Native Windows Build And Tests (push) Successful in 2m51s
CI / Windows Release Package (push) Successful in 2m55s

This commit is contained in:
Aiden
2026-05-11 23:10:45 +10:00
parent c5cead6003
commit a434a88108
18 changed files with 1115 additions and 82 deletions

44
.vscode/launch.json vendored
View File

@@ -17,6 +17,50 @@
"moduleLoad": true "moduleLoad": true
}, },
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64" "preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
},
{
"name": "Debug LoopThroughWithOpenGLCompositing - sync readback experiment",
"type": "cppvsdbg",
"request": "launch",
"program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\LoopThroughWithOpenGLCompositing.exe",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
"environment": [
{
"name": "VST_OUTPUT_READBACK_MODE",
"value": "sync"
}
],
"console": "internalConsole",
"symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
"requireExactSource": true,
"logging": {
"moduleLoad": true
},
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
},
{
"name": "Debug LoopThroughWithOpenGLCompositing - cached output experiment",
"type": "cppvsdbg",
"request": "launch",
"program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\LoopThroughWithOpenGLCompositing.exe",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
"environment": [
{
"name": "VST_OUTPUT_READBACK_MODE",
"value": "cached_only"
}
],
"console": "internalConsole",
"symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
"requireExactSource": true,
"logging": {
"moduleLoad": true
},
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
} }
] ]
} }

View File

@@ -18,6 +18,7 @@ RenderEngine::RenderEngine(
mRenderPass(mRenderer), mRenderPass(mRenderer),
mRenderPipeline(mRenderer, runtimeSnapshotProvider, healthTelemetry, std::move(renderEffect), std::move(screenshotReady), std::move(previewPaint)), mRenderPipeline(mRenderer, runtimeSnapshotProvider, healthTelemetry, std::move(renderEffect), std::move(screenshotReady), std::move(previewPaint)),
mShaderPrograms(mRenderer, runtimeSnapshotProvider), mShaderPrograms(mRenderer, runtimeSnapshotProvider),
mHealthTelemetry(healthTelemetry),
mHdc(hdc), mHdc(hdc),
mHglrc(hglrc), mHglrc(hglrc),
mFrameStateResolver(runtimeSnapshotProvider) mFrameStateResolver(runtimeSnapshotProvider)
@@ -546,7 +547,11 @@ bool RenderEngine::RequestOutputFrame(const RenderPipelineFrameContext& context,
{ {
if (mRenderThreadRunning) if (mRenderThreadRunning)
{ {
return TryInvokeOnRenderThread("output-render", [this, &context, &outputFrame]() { const auto queuedAt = std::chrono::steady_clock::now();
return TryInvokeOnRenderThread("output-render", [this, &context, &outputFrame, queuedAt]() {
const auto startedAt = std::chrono::steady_clock::now();
const double queueWaitMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(startedAt - queuedAt).count();
mHealthTelemetry.TryRecordOutputRenderQueueWait(queueWaitMilliseconds);
mRenderCommandQueue.RequestOutputFrame({ context.videoState, context.completion }); mRenderCommandQueue.RequestOutputFrame({ context.videoState, context.completion });
RenderOutputFrameRequest request; RenderOutputFrameRequest request;
return mRenderCommandQueue.TryTakeOutputFrame(request) && return mRenderCommandQueue.TryTakeOutputFrame(request) &&

View File

@@ -209,6 +209,7 @@ private:
OpenGLRenderPass mRenderPass; OpenGLRenderPass mRenderPass;
OpenGLRenderPipeline mRenderPipeline; OpenGLRenderPipeline mRenderPipeline;
OpenGLShaderPrograms mShaderPrograms; OpenGLShaderPrograms mShaderPrograms;
HealthTelemetry& mHealthTelemetry;
HDC mHdc; HDC mHdc;
HGLRC mHglrc; HGLRC mHglrc;

View File

@@ -164,6 +164,9 @@ error:
void OpenGLComposite::paintGL(bool force) void OpenGLComposite::paintGL(bool force)
{ {
if (mRuntimeUpdateController)
mRuntimeUpdateController->ProcessRuntimeWork();
if (!force) if (!force)
{ {
if (IsIconic(hGLWnd)) if (IsIconic(hGLWnd))
@@ -171,6 +174,12 @@ void OpenGLComposite::paintGL(bool force)
} }
const unsigned previewFps = mRuntimeStore ? mRuntimeStore->GetConfiguredPreviewFps() : 30u; const unsigned previewFps = mRuntimeStore ? mRuntimeStore->GetConfiguredPreviewFps() : 30u;
if (!force && mVideoBackend && mVideoBackend->ShouldPrioritizeOutputOverPreview())
{
ValidateRect(hGLWnd, NULL);
return;
}
if (!mRenderEngine->TryPresentPreview(force, previewFps, mVideoBackend->OutputFrameWidth(), mVideoBackend->OutputFrameHeight())) if (!mRenderEngine->TryPresentPreview(force, previewFps, mVideoBackend->OutputFrameWidth(), mVideoBackend->OutputFrameHeight()))
{ {
ValidateRect(hGLWnd, NULL); ValidateRect(hGLWnd, NULL);
@@ -261,6 +270,9 @@ bool OpenGLComposite::Start()
if (!mRenderEngine->StartRenderThread()) if (!mRenderEngine->StartRenderThread())
return false; return false;
if (mRuntimeUpdateController)
mRuntimeUpdateController->ProcessRuntimeWork();
if (mVideoBackend->Start()) if (mVideoBackend->Start())
return true; return true;
@@ -351,9 +363,6 @@ bool OpenGLComposite::RequestScreenshot(std::string& error)
void OpenGLComposite::renderEffect() void OpenGLComposite::renderEffect()
{ {
if (mRuntimeUpdateController)
mRuntimeUpdateController->ProcessRuntimeWork();
const RenderFrameInput frameInput = BuildRenderFrameInput(); const RenderFrameInput frameInput = BuildRenderFrameInput();
RenderFrame(frameInput); RenderFrame(frameInput);
} }

View File

@@ -8,7 +8,9 @@
#include <cstring> #include <cstring>
#include <chrono> #include <chrono>
#include <cstdlib>
#include <gl/gl.h> #include <gl/gl.h>
#include <string>
OpenGLRenderPipeline::OpenGLRenderPipeline( OpenGLRenderPipeline::OpenGLRenderPipeline(
OpenGLRenderer& renderer, OpenGLRenderer& renderer,
@@ -22,7 +24,8 @@ OpenGLRenderPipeline::OpenGLRenderPipeline(
mHealthTelemetry(healthTelemetry), mHealthTelemetry(healthTelemetry),
mRenderEffect(renderEffect), mRenderEffect(renderEffect),
mOutputReady(outputReady), mOutputReady(outputReady),
mPaint(paint) mPaint(paint),
mOutputReadbackMode(ReadOutputReadbackModeFromEnvironment())
{ {
} }
@@ -53,9 +56,22 @@ bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context
mHealthTelemetry.TryRecordPerformanceStats(state.frameBudgetMilliseconds, renderMilliseconds); mHealthTelemetry.TryRecordPerformanceStats(state.frameBudgetMilliseconds, renderMilliseconds);
mRuntimeSnapshotProvider.AdvanceFrame(); mRuntimeSnapshotProvider.AdvanceFrame();
ReadOutputFrame(state, outputFrame); OutputReadbackTiming readbackTiming = ReadOutputFrame(state, outputFrame);
if (mPaint) mHealthTelemetry.TryRecordOutputRenderPipelineTiming(
mPaint(); renderMilliseconds,
readbackTiming.fenceWaitMilliseconds,
readbackTiming.mapMilliseconds,
readbackTiming.copyMilliseconds,
readbackTiming.cachedCopyMilliseconds,
readbackTiming.asyncQueueMilliseconds,
readbackTiming.asyncQueueBufferMilliseconds,
readbackTiming.asyncQueueSetupMilliseconds,
readbackTiming.asyncQueueReadPixelsMilliseconds,
readbackTiming.asyncQueueFenceMilliseconds,
readbackTiming.syncReadMilliseconds,
readbackTiming.asyncReadbackMissed,
readbackTiming.cachedFallbackUsed,
readbackTiming.syncFallbackUsed);
return true; return true;
} }
@@ -151,8 +167,9 @@ void OpenGLRenderPipeline::FlushAsyncReadbackPipeline()
mAsyncReadbackReadIndex = 0; mAsyncReadbackReadIndex = 0;
} }
void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state) bool OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing)
{ {
const auto queueStartTime = std::chrono::steady_clock::now();
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10; const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height; const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height;
const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA; const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA;
@@ -161,8 +178,16 @@ void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width); const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width);
const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height); const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height);
const auto finishTiming = [&timing, queueStartTime]() {
const auto queueEndTime = std::chrono::steady_clock::now();
timing.asyncQueueMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(queueEndTime - queueStartTime).count();
};
if (requiredBytes == 0) if (requiredBytes == 0)
return; {
finishTiming();
return false;
}
if (mAsyncReadbackBytes != requiredBytes if (mAsyncReadbackBytes != requiredBytes
|| mAsyncReadbackFormat != format || mAsyncReadbackFormat != format
@@ -173,30 +198,50 @@ void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
mAsyncReadbackType = type; mAsyncReadbackType = type;
mAsyncReadbackFramebuffer = framebuffer; mAsyncReadbackFramebuffer = framebuffer;
if (!EnsureAsyncReadbackBuffers(requiredBytes)) if (!EnsureAsyncReadbackBuffers(requiredBytes))
return; {
finishTiming();
return false;
}
} }
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex]; AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex];
if (slot.fence != nullptr) if (slot.inFlight)
{ {
glDeleteSync(slot.fence); finishTiming();
slot.fence = nullptr; return false;
} }
auto stageStartTime = std::chrono::steady_clock::now();
glPixelStorei(GL_PACK_ALIGNMENT, 4); glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer); glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer); glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
auto stageEndTime = std::chrono::steady_clock::now();
timing.asyncQueueSetupMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
stageStartTime = std::chrono::steady_clock::now();
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ); glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
stageEndTime = std::chrono::steady_clock::now();
timing.asyncQueueBufferMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
stageStartTime = std::chrono::steady_clock::now();
glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr); glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr);
stageEndTime = std::chrono::steady_clock::now();
timing.asyncQueueReadPixelsMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
stageStartTime = std::chrono::steady_clock::now();
slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
stageEndTime = std::chrono::steady_clock::now();
timing.asyncQueueFenceMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
slot.inFlight = slot.fence != nullptr; slot.inFlight = slot.fence != nullptr;
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size(); mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size();
finishTiming();
return slot.inFlight;
} }
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds) bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds, OutputReadbackTiming& timing)
{ {
if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr) if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr)
return false; return false;
@@ -206,15 +251,24 @@ bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFra
return false; return false;
const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0; const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0;
const auto waitStartTime = std::chrono::steady_clock::now();
const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds); const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds);
const auto waitEndTime = std::chrono::steady_clock::now();
timing.fenceWaitMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(waitEndTime - waitStartTime).count();
if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED) if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
{
timing.asyncReadbackMissed = true;
return false; return false;
}
glDeleteSync(slot.fence); glDeleteSync(slot.fence);
slot.fence = nullptr; slot.fence = nullptr;
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer); glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
const auto mapStartTime = std::chrono::steady_clock::now();
void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
const auto mapEndTime = std::chrono::steady_clock::now();
timing.mapMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(mapEndTime - mapStartTime).count();
if (mappedBytes == nullptr) if (mappedBytes == nullptr)
{ {
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -223,7 +277,10 @@ bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFra
return false; return false;
} }
const auto copyStartTime = std::chrono::steady_clock::now();
std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes); std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes);
const auto copyEndTime = std::chrono::steady_clock::now();
timing.copyMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(copyEndTime - copyStartTime).count();
glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -243,8 +300,26 @@ void OpenGLRenderPipeline::CacheOutputFrame(const VideoIOOutputFrame& outputFram
std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount); std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount);
} }
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes) bool OpenGLRenderPipeline::TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const
{ {
if (outputFrame.bytes == nullptr || outputFrame.height == 0 || outputFrame.rowBytes <= 0)
return false;
const std::size_t byteCount = static_cast<std::size_t>(outputFrame.rowBytes) * outputFrame.height;
if (mCachedOutputFrame.size() != byteCount)
return false;
const auto copyStartTime = std::chrono::steady_clock::now();
std::memcpy(outputFrame.bytes, mCachedOutputFrame.data(), byteCount);
const auto copyEndTime = std::chrono::steady_clock::now();
timing.cachedCopyMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(copyEndTime - copyStartTime).count();
timing.cachedFallbackUsed = true;
return true;
}
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing)
{
const auto readStartTime = std::chrono::steady_clock::now();
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10; const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
glPixelStorei(GL_PACK_ALIGNMENT, 4); glPixelStorei(GL_PACK_ALIGNMENT, 4);
@@ -259,24 +334,78 @@ void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& stat
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer()); glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes); glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes);
} }
const auto readEndTime = std::chrono::steady_clock::now();
timing.syncReadMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(readEndTime - readStartTime).count();
timing.syncFallbackUsed = true;
} }
void OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame) OpenGLRenderPipeline::OutputReadbackTiming OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
{ {
if (TryConsumeAsyncReadback(outputFrame, 500000)) OutputReadbackTiming timing;
if (mOutputReadbackMode == OutputReadbackMode::Synchronous)
{ {
QueueAsyncReadback(state); if (outputFrame.bytes != nullptr)
return; {
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
CacheOutputFrame(outputFrame);
}
return timing;
} }
// If async readback misses the playout deadline, prefer a fresh synchronous if (mOutputReadbackMode == OutputReadbackMode::CachedOnly)
// frame over reusing stale cached output, then restart the async pipeline.
if (outputFrame.bytes != nullptr)
{ {
ReadOutputFrameSynchronously(state, outputFrame.bytes); if (TryCopyCachedOutputFrame(outputFrame, timing))
return timing;
if (outputFrame.bytes != nullptr)
{
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
CacheOutputFrame(outputFrame);
}
return timing;
}
if (TryConsumeAsyncReadback(outputFrame, 500000, timing))
{
(void)QueueAsyncReadback(state, timing);
return timing;
}
if (TryCopyCachedOutputFrame(outputFrame, timing))
{
(void)QueueAsyncReadback(state, timing);
return timing;
}
// Bootstrap only: until the first async readback has produced cached output,
// use one synchronous readback so DeckLink has a valid frame to schedule.
if (outputFrame.bytes != nullptr && mCachedOutputFrame.empty())
{
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
CacheOutputFrame(outputFrame); CacheOutputFrame(outputFrame);
} }
FlushAsyncReadbackPipeline(); FlushAsyncReadbackPipeline();
QueueAsyncReadback(state); (void)QueueAsyncReadback(state, timing);
return timing;
}
OpenGLRenderPipeline::OutputReadbackMode OpenGLRenderPipeline::ReadOutputReadbackModeFromEnvironment()
{
char* mode = nullptr;
std::size_t modeSize = 0;
if (_dupenv_s(&mode, &modeSize, "VST_OUTPUT_READBACK_MODE") != 0 || mode == nullptr)
return OutputReadbackMode::AsyncPbo;
const std::string modeValue(mode);
std::free(mode);
if (modeValue == "async_pbo")
return OutputReadbackMode::AsyncPbo;
if (modeValue == "sync")
return OutputReadbackMode::Synchronous;
if (modeValue == "cached_only")
return OutputReadbackMode::CachedOnly;
return OutputReadbackMode::AsyncPbo;
} }

View File

@@ -36,6 +36,13 @@ public:
bool RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame); bool RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame);
private: private:
enum class OutputReadbackMode
{
AsyncPbo,
Synchronous,
CachedOnly
};
struct AsyncReadbackSlot struct AsyncReadbackSlot
{ {
GLuint pixelPackBuffer = 0; GLuint pixelPackBuffer = 0;
@@ -44,15 +51,34 @@ private:
bool inFlight = false; bool inFlight = false;
}; };
struct OutputReadbackTiming
{
double fenceWaitMilliseconds = 0.0;
double mapMilliseconds = 0.0;
double copyMilliseconds = 0.0;
double cachedCopyMilliseconds = 0.0;
double asyncQueueMilliseconds = 0.0;
double asyncQueueBufferMilliseconds = 0.0;
double asyncQueueSetupMilliseconds = 0.0;
double asyncQueueReadPixelsMilliseconds = 0.0;
double asyncQueueFenceMilliseconds = 0.0;
double syncReadMilliseconds = 0.0;
bool asyncReadbackMissed = false;
bool cachedFallbackUsed = false;
bool syncFallbackUsed = false;
};
bool EnsureAsyncReadbackBuffers(std::size_t requiredBytes); bool EnsureAsyncReadbackBuffers(std::size_t requiredBytes);
void ResetAsyncReadbackState(); void ResetAsyncReadbackState();
void FlushAsyncReadbackPipeline(); void FlushAsyncReadbackPipeline();
void QueueAsyncReadback(const VideoIOState& state); bool QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing);
bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds); bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds, OutputReadbackTiming& timing);
void CacheOutputFrame(const VideoIOOutputFrame& outputFrame); void CacheOutputFrame(const VideoIOOutputFrame& outputFrame);
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes); bool TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const;
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing);
void PackOutputFor10Bit(const VideoIOState& state); void PackOutputFor10Bit(const VideoIOState& state);
void ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame); OutputReadbackTiming ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
static OutputReadbackMode ReadOutputReadbackModeFromEnvironment();
OpenGLRenderer& mRenderer; OpenGLRenderer& mRenderer;
RuntimeSnapshotProvider& mRuntimeSnapshotProvider; RuntimeSnapshotProvider& mRuntimeSnapshotProvider;
@@ -60,6 +86,7 @@ private:
RenderEffectCallback mRenderEffect; RenderEffectCallback mRenderEffect;
OutputReadyCallback mOutputReady; OutputReadyCallback mOutputReady;
PaintCallback mPaint; PaintCallback mPaint;
OutputReadbackMode mOutputReadbackMode = OutputReadbackMode::AsyncPbo;
std::array<AsyncReadbackSlot, 3> mAsyncReadbackSlots; std::array<AsyncReadbackSlot, 3> mAsyncReadbackSlots;
std::size_t mAsyncReadbackWriteIndex = 0; std::size_t mAsyncReadbackWriteIndex = 0;
std::size_t mAsyncReadbackReadIndex = 0; std::size_t mAsyncReadbackReadIndex = 0;

View File

@@ -95,6 +95,24 @@ JsonValue RuntimeStatePresenter::BuildRuntimeStateValue(const RuntimeStore& runt
outputRender.set("renderMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderMilliseconds)); outputRender.set("renderMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderMilliseconds));
outputRender.set("smoothedRenderMs", JsonValue(telemetrySnapshot.backendPlayout.smoothedOutputRenderMilliseconds)); outputRender.set("smoothedRenderMs", JsonValue(telemetrySnapshot.backendPlayout.smoothedOutputRenderMilliseconds));
outputRender.set("maxRenderMs", JsonValue(telemetrySnapshot.backendPlayout.maxOutputRenderMilliseconds)); outputRender.set("maxRenderMs", JsonValue(telemetrySnapshot.backendPlayout.maxOutputRenderMilliseconds));
outputRender.set("acquireFrameMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameAcquireMilliseconds));
outputRender.set("renderRequestMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameRenderRequestMilliseconds));
outputRender.set("endAccessMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameEndAccessMilliseconds));
outputRender.set("queueWaitMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderQueueWaitMilliseconds));
outputRender.set("drawMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderDrawMilliseconds));
outputRender.set("fenceWaitMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackFenceWaitMilliseconds));
outputRender.set("mapMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackMapMilliseconds));
outputRender.set("readbackCopyMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackCopyMilliseconds));
outputRender.set("cachedCopyMs", JsonValue(telemetrySnapshot.backendPlayout.outputCachedCopyMilliseconds));
outputRender.set("asyncQueueMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueMilliseconds));
outputRender.set("asyncQueueBufferMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueBufferMilliseconds));
outputRender.set("asyncQueueSetupMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueSetupMilliseconds));
outputRender.set("asyncQueueReadPixelsMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueReadPixelsMilliseconds));
outputRender.set("asyncQueueFenceMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueFenceMilliseconds));
outputRender.set("syncReadMs", JsonValue(telemetrySnapshot.backendPlayout.outputSyncReadMilliseconds));
outputRender.set("asyncReadbackMissCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputAsyncReadbackMissCount)));
outputRender.set("cachedFallbackCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputCachedFallbackCount)));
outputRender.set("syncFallbackCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputSyncFallbackCount)));
JsonValue recovery = JsonValue::MakeObject(); JsonValue recovery = JsonValue::MakeObject();
recovery.set("completionResult", JsonValue(telemetrySnapshot.backendPlayout.completionResult)); recovery.set("completionResult", JsonValue(telemetrySnapshot.backendPlayout.completionResult));

View File

@@ -212,6 +212,7 @@ void HealthTelemetry::RecordBackendPlayoutHealth(const std::string& lifecycleSta
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount, std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount, uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds, double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames, uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak, uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount, uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
@@ -232,6 +233,9 @@ void HealthTelemetry::RecordBackendPlayoutHealth(const std::string& lifecycleSta
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0); mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0); mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0); mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
mBackendPlayout.outputFrameAcquireMilliseconds = std::max(outputFrameAcquireMilliseconds, 0.0);
mBackendPlayout.outputFrameRenderRequestMilliseconds = std::max(outputFrameRenderRequestMilliseconds, 0.0);
mBackendPlayout.outputFrameEndAccessMilliseconds = std::max(outputFrameEndAccessMilliseconds, 0.0);
mBackendPlayout.completedFrameIndex = completedFrameIndex; mBackendPlayout.completedFrameIndex = completedFrameIndex;
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex; mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames; mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
@@ -251,6 +255,7 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount, std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount, uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds, double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames, uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak, uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount, uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
@@ -274,6 +279,9 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0); mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0); mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0); mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
mBackendPlayout.outputFrameAcquireMilliseconds = std::max(outputFrameAcquireMilliseconds, 0.0);
mBackendPlayout.outputFrameRenderRequestMilliseconds = std::max(outputFrameRenderRequestMilliseconds, 0.0);
mBackendPlayout.outputFrameEndAccessMilliseconds = std::max(outputFrameEndAccessMilliseconds, 0.0);
mBackendPlayout.completedFrameIndex = completedFrameIndex; mBackendPlayout.completedFrameIndex = completedFrameIndex;
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex; mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames; mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
@@ -289,6 +297,98 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
return true; return true;
} }
void HealthTelemetry::RecordOutputRenderQueueWait(double queueWaitMilliseconds)
{
std::lock_guard<std::mutex> lock(mMutex);
mBackendPlayout.outputRenderQueueWaitMilliseconds = std::max(queueWaitMilliseconds, 0.0);
}
bool HealthTelemetry::TryRecordOutputRenderQueueWait(double queueWaitMilliseconds)
{
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
if (!lock.owns_lock())
return false;
mBackendPlayout.outputRenderQueueWaitMilliseconds = std::max(queueWaitMilliseconds, 0.0);
return true;
}
void HealthTelemetry::RecordOutputRenderPipelineTiming(
double drawMilliseconds,
double fenceWaitMilliseconds,
double mapMilliseconds,
double readbackCopyMilliseconds,
double cachedCopyMilliseconds,
double asyncQueueMilliseconds,
double asyncQueueBufferMilliseconds,
double asyncQueueSetupMilliseconds,
double asyncQueueReadPixelsMilliseconds,
double asyncQueueFenceMilliseconds,
double syncReadMilliseconds,
bool asyncReadbackMissed,
bool cachedFallbackUsed,
bool syncFallbackUsed)
{
std::lock_guard<std::mutex> lock(mMutex);
mBackendPlayout.outputRenderDrawMilliseconds = std::max(drawMilliseconds, 0.0);
mBackendPlayout.outputReadbackFenceWaitMilliseconds = std::max(fenceWaitMilliseconds, 0.0);
mBackendPlayout.outputReadbackMapMilliseconds = std::max(mapMilliseconds, 0.0);
mBackendPlayout.outputReadbackCopyMilliseconds = std::max(readbackCopyMilliseconds, 0.0);
mBackendPlayout.outputCachedCopyMilliseconds = std::max(cachedCopyMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueMilliseconds = std::max(asyncQueueMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueBufferMilliseconds = std::max(asyncQueueBufferMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueSetupMilliseconds = std::max(asyncQueueSetupMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueReadPixelsMilliseconds = std::max(asyncQueueReadPixelsMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueFenceMilliseconds = std::max(asyncQueueFenceMilliseconds, 0.0);
mBackendPlayout.outputSyncReadMilliseconds = std::max(syncReadMilliseconds, 0.0);
if (asyncReadbackMissed)
++mBackendPlayout.outputAsyncReadbackMissCount;
if (cachedFallbackUsed)
++mBackendPlayout.outputCachedFallbackCount;
if (syncFallbackUsed)
++mBackendPlayout.outputSyncFallbackCount;
}
bool HealthTelemetry::TryRecordOutputRenderPipelineTiming(
double drawMilliseconds,
double fenceWaitMilliseconds,
double mapMilliseconds,
double readbackCopyMilliseconds,
double cachedCopyMilliseconds,
double asyncQueueMilliseconds,
double asyncQueueBufferMilliseconds,
double asyncQueueSetupMilliseconds,
double asyncQueueReadPixelsMilliseconds,
double asyncQueueFenceMilliseconds,
double syncReadMilliseconds,
bool asyncReadbackMissed,
bool cachedFallbackUsed,
bool syncFallbackUsed)
{
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
if (!lock.owns_lock())
return false;
mBackendPlayout.outputRenderDrawMilliseconds = std::max(drawMilliseconds, 0.0);
mBackendPlayout.outputReadbackFenceWaitMilliseconds = std::max(fenceWaitMilliseconds, 0.0);
mBackendPlayout.outputReadbackMapMilliseconds = std::max(mapMilliseconds, 0.0);
mBackendPlayout.outputReadbackCopyMilliseconds = std::max(readbackCopyMilliseconds, 0.0);
mBackendPlayout.outputCachedCopyMilliseconds = std::max(cachedCopyMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueMilliseconds = std::max(asyncQueueMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueBufferMilliseconds = std::max(asyncQueueBufferMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueSetupMilliseconds = std::max(asyncQueueSetupMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueReadPixelsMilliseconds = std::max(asyncQueueReadPixelsMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueFenceMilliseconds = std::max(asyncQueueFenceMilliseconds, 0.0);
mBackendPlayout.outputSyncReadMilliseconds = std::max(syncReadMilliseconds, 0.0);
if (asyncReadbackMissed)
++mBackendPlayout.outputAsyncReadbackMissCount;
if (cachedFallbackUsed)
++mBackendPlayout.outputCachedFallbackCount;
if (syncFallbackUsed)
++mBackendPlayout.outputSyncFallbackCount;
return true;
}
HealthTelemetry::SignalStatusSnapshot HealthTelemetry::GetSignalStatusSnapshot() const HealthTelemetry::SignalStatusSnapshot HealthTelemetry::GetSignalStatusSnapshot() const
{ {
std::lock_guard<std::mutex> lock(mMutex); std::lock_guard<std::mutex> lock(mMutex);

View File

@@ -97,6 +97,24 @@ public:
double outputRenderMilliseconds = 0.0; double outputRenderMilliseconds = 0.0;
double smoothedOutputRenderMilliseconds = 0.0; double smoothedOutputRenderMilliseconds = 0.0;
double maxOutputRenderMilliseconds = 0.0; double maxOutputRenderMilliseconds = 0.0;
double outputFrameAcquireMilliseconds = 0.0;
double outputFrameRenderRequestMilliseconds = 0.0;
double outputFrameEndAccessMilliseconds = 0.0;
double outputRenderQueueWaitMilliseconds = 0.0;
double outputRenderDrawMilliseconds = 0.0;
double outputReadbackFenceWaitMilliseconds = 0.0;
double outputReadbackMapMilliseconds = 0.0;
double outputReadbackCopyMilliseconds = 0.0;
double outputCachedCopyMilliseconds = 0.0;
double outputAsyncQueueMilliseconds = 0.0;
double outputAsyncQueueBufferMilliseconds = 0.0;
double outputAsyncQueueSetupMilliseconds = 0.0;
double outputAsyncQueueReadPixelsMilliseconds = 0.0;
double outputAsyncQueueFenceMilliseconds = 0.0;
double outputSyncReadMilliseconds = 0.0;
uint64_t outputAsyncReadbackMissCount = 0;
uint64_t outputCachedFallbackCount = 0;
uint64_t outputSyncFallbackCount = 0;
uint64_t completedFrameIndex = 0; uint64_t completedFrameIndex = 0;
uint64_t scheduledFrameIndex = 0; uint64_t scheduledFrameIndex = 0;
uint64_t scheduledLeadFrames = 0; uint64_t scheduledLeadFrames = 0;
@@ -161,6 +179,7 @@ public:
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount, std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount, uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds, double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames, uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak, uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount, uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
@@ -170,11 +189,46 @@ public:
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount, std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount, uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds, double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames, uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak, uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount, uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
bool degraded, const std::string& statusMessage); bool degraded, const std::string& statusMessage);
void RecordOutputRenderQueueWait(double queueWaitMilliseconds);
bool TryRecordOutputRenderQueueWait(double queueWaitMilliseconds);
void RecordOutputRenderPipelineTiming(
double drawMilliseconds,
double fenceWaitMilliseconds,
double mapMilliseconds,
double readbackCopyMilliseconds,
double cachedCopyMilliseconds,
double asyncQueueMilliseconds,
double asyncQueueBufferMilliseconds,
double asyncQueueSetupMilliseconds,
double asyncQueueReadPixelsMilliseconds,
double asyncQueueFenceMilliseconds,
double syncReadMilliseconds,
bool asyncReadbackMissed,
bool cachedFallbackUsed,
bool syncFallbackUsed);
bool TryRecordOutputRenderPipelineTiming(
double drawMilliseconds,
double fenceWaitMilliseconds,
double mapMilliseconds,
double readbackCopyMilliseconds,
double cachedCopyMilliseconds,
double asyncQueueMilliseconds,
double asyncQueueBufferMilliseconds,
double asyncQueueSetupMilliseconds,
double asyncQueueReadPixelsMilliseconds,
double asyncQueueFenceMilliseconds,
double syncReadMilliseconds,
bool asyncReadbackMissed,
bool cachedFallbackUsed,
bool syncFallbackUsed);
SignalStatusSnapshot GetSignalStatusSnapshot() const; SignalStatusSnapshot GetSignalStatusSnapshot() const;
VideoIOStatusSnapshot GetVideoIOStatusSnapshot() const; VideoIOStatusSnapshot GetVideoIOStatusSnapshot() const;
PerformanceSnapshot GetPerformanceSnapshot() const; PerformanceSnapshot GetPerformanceSnapshot() const;

View File

@@ -9,12 +9,14 @@
#include <algorithm> #include <algorithm>
#include <chrono> #include <chrono>
#include <cstring> #include <cstring>
#include <cmath>
#include <windows.h> #include <windows.h>
VideoBackend::VideoBackend(RenderEngine& renderEngine, HealthTelemetry& healthTelemetry, RuntimeEventDispatcher& runtimeEventDispatcher) : VideoBackend::VideoBackend(RenderEngine& renderEngine, HealthTelemetry& healthTelemetry, RuntimeEventDispatcher& runtimeEventDispatcher) :
mHealthTelemetry(healthTelemetry), mHealthTelemetry(healthTelemetry),
mRuntimeEventDispatcher(runtimeEventDispatcher), mRuntimeEventDispatcher(runtimeEventDispatcher),
mPlayoutPolicy(NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy())), mPlayoutPolicy(NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy())),
mOutputProductionController(mPlayoutPolicy),
mReadyOutputQueue(mPlayoutPolicy), mReadyOutputQueue(mPlayoutPolicy),
mVideoIODevice(std::make_unique<DeckLinkSession>()), mVideoIODevice(std::make_unique<DeckLinkSession>()),
mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine)) mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine))
@@ -80,6 +82,7 @@ bool VideoBackend::ConfigureInput(const VideoFormat& inputVideoMode, std::string
bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error) bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error)
{ {
mPlayoutPolicy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy); mPlayoutPolicy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy);
mOutputProductionController.Configure(mPlayoutPolicy);
mReadyOutputQueue.Configure(mPlayoutPolicy); mReadyOutputQueue.Configure(mPlayoutPolicy);
if (mLifecycle.State() != VideoBackendLifecycleState::Configuring) if (mLifecycle.State() != VideoBackendLifecycleState::Configuring)
ApplyLifecycleTransition(VideoBackendLifecycleState::Configuring, "Configuring video backend output."); ApplyLifecycleTransition(VideoBackendLifecycleState::Configuring, "Configuring video backend output.");
@@ -101,7 +104,10 @@ bool VideoBackend::Start()
StartOutputCompletionWorker(); StartOutputCompletionWorker();
const bool started = mVideoIODevice->Start(); const bool started = mVideoIODevice->Start();
if (started) if (started)
{
StartOutputProducerWorker();
ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started."); ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started.");
}
else else
{ {
StopOutputCompletionWorker(); StopOutputCompletionWorker();
@@ -113,6 +119,7 @@ bool VideoBackend::Start()
bool VideoBackend::Stop() bool VideoBackend::Stop()
{ {
ApplyLifecycleTransition(VideoBackendLifecycleState::Stopping, "Video backend stopping."); ApplyLifecycleTransition(VideoBackendLifecycleState::Stopping, "Video backend stopping.");
StopOutputProducerWorker();
const bool stopped = mVideoIODevice->Stop(); const bool stopped = mVideoIODevice->Stop();
StopOutputCompletionWorker(); StopOutputCompletionWorker();
if (stopped) if (stopped)
@@ -232,6 +239,12 @@ const std::string& VideoBackend::StatusMessage() const
return mVideoIODevice->StatusMessage(); return mVideoIODevice->StatusMessage();
} }
bool VideoBackend::ShouldPrioritizeOutputOverPreview() const
{
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
return metrics.depth < static_cast<std::size_t>(mPlayoutPolicy.targetReadyFrames);
}
void VideoBackend::SetStatusMessage(const std::string& message) void VideoBackend::SetStatusMessage(const std::string& message)
{ {
mVideoIODevice->SetStatusMessage(message); mVideoIODevice->SetStatusMessage(message);
@@ -288,27 +301,36 @@ void VideoBackend::HandleOutputFrameCompletion(const VideoIOCompletion& completi
void VideoBackend::StartOutputCompletionWorker() void VideoBackend::StartOutputCompletionWorker()
{ {
std::lock_guard<std::mutex> lock(mOutputCompletionMutex); {
if (mOutputCompletionWorkerRunning) std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
return; if (mOutputCompletionWorkerRunning)
return;
mPendingOutputCompletions.clear(); mPendingOutputCompletions.clear();
mReadyOutputQueue.Clear(); mReadyOutputQueue.Clear();
mNextReadyOutputFrameIndex = 0; mNextReadyOutputFrameIndex = 0;
mHasReadyQueueDepthBaseline = false; mHasReadyQueueDepthBaseline = false;
mMinReadyQueueDepth = 0; mMinReadyQueueDepth = 0;
mMaxReadyQueueDepth = 0; mMaxReadyQueueDepth = 0;
mReadyQueueZeroDepthCount = 0; mReadyQueueZeroDepthCount = 0;
mOutputRenderMilliseconds = 0.0; mOutputRenderMilliseconds = 0.0;
mSmoothedOutputRenderMilliseconds = 0.0; mSmoothedOutputRenderMilliseconds = 0.0;
mMaxOutputRenderMilliseconds = 0.0; mMaxOutputRenderMilliseconds = 0.0;
mOutputCompletionWorkerStopping = false; mOutputFrameAcquireMilliseconds = 0.0;
mOutputCompletionWorkerRunning = true; mOutputFrameRenderRequestMilliseconds = 0.0;
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this); mOutputFrameEndAccessMilliseconds = 0.0;
mLastLateStreak = 0;
mLastDropStreak = 0;
mOutputCompletionWorkerStopping = false;
mOutputCompletionWorkerRunning = true;
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
}
} }
void VideoBackend::StopOutputCompletionWorker() void VideoBackend::StopOutputCompletionWorker()
{ {
StopOutputProducerWorker();
bool shouldJoin = false; bool shouldJoin = false;
{ {
std::lock_guard<std::mutex> lock(mOutputCompletionMutex); std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
@@ -322,6 +344,40 @@ void VideoBackend::StopOutputCompletionWorker()
mOutputCompletionWorker.join(); mOutputCompletionWorker.join();
} }
void VideoBackend::StartOutputProducerWorker()
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerRunning)
return;
mLastOutputProductionCompletion = VideoIOCompletion();
mLastOutputProductionTime = std::chrono::steady_clock::time_point();
mOutputProducerWorkerStopping = false;
mOutputProducerWorkerRunning = true;
mOutputProducerWorker = std::thread(&VideoBackend::OutputProducerWorkerMain, this);
mOutputProducerCondition.notify_one();
}
void VideoBackend::StopOutputProducerWorker()
{
bool shouldJoin = false;
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerRunning)
mOutputProducerWorkerStopping = true;
shouldJoin = mOutputProducerWorker.joinable();
}
mOutputProducerCondition.notify_one();
if (shouldJoin)
mOutputProducerWorker.join();
}
void VideoBackend::NotifyOutputProducer()
{
mOutputProducerCondition.notify_one();
}
void VideoBackend::OutputCompletionWorkerMain() void VideoBackend::OutputCompletionWorkerMain()
{ {
for (;;) for (;;)
@@ -351,6 +407,57 @@ void VideoBackend::OutputCompletionWorkerMain()
} }
} }
void VideoBackend::OutputProducerWorkerMain()
{
for (;;)
{
{
std::unique_lock<std::mutex> lock(mOutputProducerMutex);
mOutputProducerCondition.wait_for(lock, OutputProducerWakeInterval());
if (mOutputProducerWorkerStopping)
{
mOutputProducerWorkerRunning = false;
return;
}
}
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(metrics);
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
if (decision.action != OutputProductionAction::Produce || decision.requestedFrames == 0)
continue;
VideoIOCompletion completion;
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerStopping)
continue;
completion = mLastOutputProductionCompletion;
}
const auto now = std::chrono::steady_clock::now();
if (mLastOutputProductionTime != std::chrono::steady_clock::time_point() &&
now - mLastOutputProductionTime < OutputProducerWakeInterval())
{
continue;
}
if (ProduceReadyOutputFrames(completion, 1) > 0)
mLastOutputProductionTime = std::chrono::steady_clock::now();
}
}
std::chrono::milliseconds VideoBackend::OutputProducerWakeInterval() const
{
const double frameBudgetMilliseconds = State().frameBudgetMilliseconds;
if (frameBudgetMilliseconds <= 0.0)
return std::chrono::milliseconds(8);
const int intervalMilliseconds = (std::max)(1, static_cast<int>(std::floor(frameBudgetMilliseconds * 0.75)));
return std::chrono::milliseconds(intervalMilliseconds);
}
void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion) void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion)
{ {
RecordFramePacing(completion.result); RecordFramePacing(completion.result);
@@ -358,31 +465,69 @@ void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& complet
const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics(); const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(initialQueueMetrics); RecordReadyQueueDepthSample(initialQueueMetrics);
const VideoPlayoutRecoveryDecision recoveryDecision = AccountForCompletionResult(completion.result, initialQueueMetrics.depth); const VideoPlayoutRecoveryDecision recoveryDecision = AccountForCompletionResult(completion.result, initialQueueMetrics.depth);
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
mLastLateStreak = recoveryDecision.lateStreak;
mLastDropStreak = recoveryDecision.dropStreak;
}
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
mLastOutputProductionCompletion = completion;
}
NotifyOutputProducer();
FillReadyOutputQueue(completion); if (!ScheduleReadyOutputFrame() &&
if (!ScheduleReadyOutputFrame()) (ProduceReadyOutputFrames(completion, 1) == 0 || !ScheduleReadyOutputFrame()))
{
ScheduleBlackUnderrunFrame(); ScheduleBlackUnderrunFrame();
}
NotifyOutputProducer();
RecordBackendPlayoutHealth(completion.result, recoveryDecision); RecordBackendPlayoutHealth(completion.result, recoveryDecision);
} }
void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision) void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision)
{ {
const RenderOutputQueueMetrics queueMetrics = mReadyOutputQueue.GetMetrics(); const RenderOutputQueueMetrics queueMetrics = mReadyOutputQueue.GetMetrics();
std::size_t minReadyQueueDepth = 0;
std::size_t maxReadyQueueDepth = 0;
uint64_t readyQueueZeroDepthCount = 0;
double outputRenderMilliseconds = 0.0;
double smoothedOutputRenderMilliseconds = 0.0;
double maxOutputRenderMilliseconds = 0.0;
double outputFrameAcquireMilliseconds = 0.0;
double outputFrameRenderRequestMilliseconds = 0.0;
double outputFrameEndAccessMilliseconds = 0.0;
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
minReadyQueueDepth = mMinReadyQueueDepth;
maxReadyQueueDepth = mMaxReadyQueueDepth;
readyQueueZeroDepthCount = mReadyQueueZeroDepthCount;
outputRenderMilliseconds = mOutputRenderMilliseconds;
smoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds;
maxOutputRenderMilliseconds = mMaxOutputRenderMilliseconds;
outputFrameAcquireMilliseconds = mOutputFrameAcquireMilliseconds;
outputFrameRenderRequestMilliseconds = mOutputFrameRenderRequestMilliseconds;
outputFrameEndAccessMilliseconds = mOutputFrameEndAccessMilliseconds;
}
mHealthTelemetry.TryRecordBackendPlayoutHealth( mHealthTelemetry.TryRecordBackendPlayoutHealth(
VideoBackendLifecycle::StateName(mLifecycle.State()), VideoBackendLifecycle::StateName(mLifecycle.State()),
CompletionResultName(result), CompletionResultName(result),
queueMetrics.depth, queueMetrics.depth,
queueMetrics.capacity, queueMetrics.capacity,
queueMetrics.pushedCount, queueMetrics.pushedCount,
mMinReadyQueueDepth, minReadyQueueDepth,
mMaxReadyQueueDepth, maxReadyQueueDepth,
mReadyQueueZeroDepthCount, readyQueueZeroDepthCount,
queueMetrics.poppedCount, queueMetrics.poppedCount,
queueMetrics.droppedCount, queueMetrics.droppedCount,
queueMetrics.underrunCount, queueMetrics.underrunCount,
mOutputRenderMilliseconds, outputRenderMilliseconds,
mSmoothedOutputRenderMilliseconds, smoothedOutputRenderMilliseconds,
mMaxOutputRenderMilliseconds, maxOutputRenderMilliseconds,
outputFrameAcquireMilliseconds,
outputFrameRenderRequestMilliseconds,
outputFrameEndAccessMilliseconds,
recoveryDecision.completedFrameIndex, recoveryDecision.completedFrameIndex,
recoveryDecision.scheduledFrameIndex, recoveryDecision.scheduledFrameIndex,
recoveryDecision.scheduledLeadFrames, recoveryDecision.scheduledLeadFrames,
@@ -397,50 +542,93 @@ void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, co
StatusMessage()); StatusMessage());
} }
bool VideoBackend::FillReadyOutputQueue(const VideoIOCompletion& completion) std::size_t VideoBackend::ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames)
{ {
if (maxFrames == 0)
return 0;
std::lock_guard<std::mutex> productionLock(mOutputProductionMutex);
RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics(); RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
bool filledAny = false; std::size_t producedFrames = 0;
while (metrics.depth < mPlayoutPolicy.targetReadyFrames) while (producedFrames < maxFrames)
{ {
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
if (decision.action != OutputProductionAction::Produce)
break;
if (!RenderReadyOutputFrame(mVideoIODevice->State(), completion)) if (!RenderReadyOutputFrame(mVideoIODevice->State(), completion))
return filledAny; break;
filledAny = true; ++producedFrames;
metrics = mReadyOutputQueue.GetMetrics(); metrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(metrics); RecordReadyQueueDepthSample(metrics);
} }
return true; return producedFrames;
}
OutputProductionPressure VideoBackend::BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const
{
OutputProductionPressure pressure;
pressure.readyQueueDepth = metrics.depth;
pressure.readyQueueCapacity = metrics.capacity;
pressure.readyQueueUnderrunCount = metrics.underrunCount;
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
pressure.lateStreak = mLastLateStreak;
pressure.dropStreak = mLastDropStreak;
}
return pressure;
} }
bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion) bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion)
{ {
const auto renderStart = std::chrono::steady_clock::now(); const auto renderStart = std::chrono::steady_clock::now();
VideoIOOutputFrame outputFrame; VideoIOOutputFrame outputFrame;
const auto acquireStart = std::chrono::steady_clock::now();
if (!BeginOutputFrame(outputFrame)) if (!BeginOutputFrame(outputFrame))
return false; return false;
const auto acquireEnd = std::chrono::steady_clock::now();
bool rendered = true; bool rendered = true;
const auto renderRequestStart = std::chrono::steady_clock::now();
if (mBridge) if (mBridge)
rendered = mBridge->RenderScheduledFrame(state, completion, outputFrame); rendered = mBridge->RenderScheduledFrame(state, completion, outputFrame);
const auto renderRequestEnd = std::chrono::steady_clock::now();
const auto endAccessStart = std::chrono::steady_clock::now();
EndOutputFrame(outputFrame); EndOutputFrame(outputFrame);
const auto endAccessEnd = std::chrono::steady_clock::now();
const double acquireMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(acquireEnd - acquireStart).count();
const double renderRequestMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderRequestEnd - renderRequestStart).count();
const double endAccessMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(endAccessEnd - endAccessStart).count();
if (!rendered) if (!rendered)
{ {
ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame."); ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame.");
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>( const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
std::chrono::steady_clock::now() - renderStart).count(); std::chrono::steady_clock::now() - renderStart).count();
RecordOutputRenderDuration(renderMilliseconds); RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
if (outputFrame.nativeFrame != nullptr)
{
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
outputFrame.nativeFrame = nullptr;
}
return false; return false;
} }
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>( const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
std::chrono::steady_clock::now() - renderStart).count(); std::chrono::steady_clock::now() - renderStart).count();
RecordOutputRenderDuration(renderMilliseconds); RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
RenderOutputFrame readyFrame; RenderOutputFrame readyFrame;
readyFrame.frame = outputFrame; readyFrame.frame = outputFrame;
readyFrame.frameIndex = ++mNextReadyOutputFrameIndex; readyFrame.frameIndex = ++mNextReadyOutputFrameIndex;
return mReadyOutputQueue.Push(readyFrame); const bool pushed = mReadyOutputQueue.Push(readyFrame);
if (!pushed && outputFrame.nativeFrame != nullptr)
{
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
outputFrame.nativeFrame = nullptr;
}
return pushed;
} }
bool VideoBackend::ScheduleReadyOutputFrame() bool VideoBackend::ScheduleReadyOutputFrame()
@@ -516,6 +704,7 @@ void VideoBackend::RecordFramePacing(VideoIOCompletionResult completionResult)
void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics) void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics)
{ {
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
if (!mHasReadyQueueDepthBaseline) if (!mHasReadyQueueDepthBaseline)
{ {
mHasReadyQueueDepthBaseline = true; mHasReadyQueueDepthBaseline = true;
@@ -532,14 +721,18 @@ void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& m
++mReadyQueueZeroDepthCount; ++mReadyQueueZeroDepthCount;
} }
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds) void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds)
{ {
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
mOutputRenderMilliseconds = (std::max)(renderMilliseconds, 0.0); mOutputRenderMilliseconds = (std::max)(renderMilliseconds, 0.0);
if (mSmoothedOutputRenderMilliseconds <= 0.0) if (mSmoothedOutputRenderMilliseconds <= 0.0)
mSmoothedOutputRenderMilliseconds = mOutputRenderMilliseconds; mSmoothedOutputRenderMilliseconds = mOutputRenderMilliseconds;
else else
mSmoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds * 0.9 + mOutputRenderMilliseconds * 0.1; mSmoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds * 0.9 + mOutputRenderMilliseconds * 0.1;
mMaxOutputRenderMilliseconds = (std::max)(mMaxOutputRenderMilliseconds, mOutputRenderMilliseconds); mMaxOutputRenderMilliseconds = (std::max)(mMaxOutputRenderMilliseconds, mOutputRenderMilliseconds);
mOutputFrameAcquireMilliseconds = (std::max)(acquireMilliseconds, 0.0);
mOutputFrameRenderRequestMilliseconds = (std::max)(renderRequestMilliseconds, 0.0);
mOutputFrameEndAccessMilliseconds = (std::max)(endAccessMilliseconds, 0.0);
PublishTimingSample("VideoBackend", "outputRender", mOutputRenderMilliseconds, "ms"); PublishTimingSample("VideoBackend", "outputRender", mOutputRenderMilliseconds, "ms");
PublishTimingSample("VideoBackend", "smoothedOutputRender", mSmoothedOutputRenderMilliseconds, "ms"); PublishTimingSample("VideoBackend", "smoothedOutputRender", mSmoothedOutputRenderMilliseconds, "ms");

View File

@@ -1,5 +1,6 @@
#pragma once #pragma once
#include "OutputProductionController.h"
#include "RenderOutputQueue.h" #include "RenderOutputQueue.h"
#include "VideoBackendLifecycle.h" #include "VideoBackendLifecycle.h"
#include "VideoIOTypes.h" #include "VideoIOTypes.h"
@@ -59,6 +60,7 @@ public:
bool KeyerInterfaceAvailable() const; bool KeyerInterfaceAvailable() const;
bool ExternalKeyingActive() const; bool ExternalKeyingActive() const;
const std::string& StatusMessage() const; const std::string& StatusMessage() const;
bool ShouldPrioritizeOutputOverPreview() const;
void SetStatusMessage(const std::string& message); void SetStatusMessage(const std::string& message);
void PublishStatus(bool externalKeyingConfigured, const std::string& statusMessage = std::string()); void PublishStatus(bool externalKeyingConfigured, const std::string& statusMessage = std::string());
void ReportNoInputDeviceSignalStatus(); void ReportNoInputDeviceSignalStatus();
@@ -69,14 +71,20 @@ private:
void StartOutputCompletionWorker(); void StartOutputCompletionWorker();
void StopOutputCompletionWorker(); void StopOutputCompletionWorker();
void OutputCompletionWorkerMain(); void OutputCompletionWorkerMain();
void StartOutputProducerWorker();
void StopOutputProducerWorker();
void OutputProducerWorkerMain();
void NotifyOutputProducer();
std::chrono::milliseconds OutputProducerWakeInterval() const;
void ProcessOutputFrameCompletion(const VideoIOCompletion& completion); void ProcessOutputFrameCompletion(const VideoIOCompletion& completion);
bool FillReadyOutputQueue(const VideoIOCompletion& completion); std::size_t ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames);
OutputProductionPressure BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const;
bool RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion); bool RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion);
bool ScheduleReadyOutputFrame(); bool ScheduleReadyOutputFrame();
bool ScheduleBlackUnderrunFrame(); bool ScheduleBlackUnderrunFrame();
void RecordFramePacing(VideoIOCompletionResult completionResult); void RecordFramePacing(VideoIOCompletionResult completionResult);
void RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics); void RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics);
void RecordOutputRenderDuration(double renderMilliseconds); void RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds);
bool ApplyLifecycleTransition(VideoBackendLifecycleState state, const std::string& message); bool ApplyLifecycleTransition(VideoBackendLifecycleState state, const std::string& message);
bool ApplyLifecycleFailure(const std::string& message); bool ApplyLifecycleFailure(const std::string& message);
void PublishBackendStateChanged(const std::string& state, const std::string& message); void PublishBackendStateChanged(const std::string& state, const std::string& message);
@@ -92,6 +100,7 @@ private:
RuntimeEventDispatcher& mRuntimeEventDispatcher; RuntimeEventDispatcher& mRuntimeEventDispatcher;
VideoBackendLifecycle mLifecycle; VideoBackendLifecycle mLifecycle;
VideoPlayoutPolicy mPlayoutPolicy; VideoPlayoutPolicy mPlayoutPolicy;
OutputProductionController mOutputProductionController;
RenderOutputQueue mReadyOutputQueue; RenderOutputQueue mReadyOutputQueue;
std::unique_ptr<VideoIODevice> mVideoIODevice; std::unique_ptr<VideoIODevice> mVideoIODevice;
std::unique_ptr<OpenGLVideoIOBridge> mBridge; std::unique_ptr<OpenGLVideoIOBridge> mBridge;
@@ -99,8 +108,17 @@ private:
std::condition_variable mOutputCompletionCondition; std::condition_variable mOutputCompletionCondition;
std::deque<VideoIOCompletion> mPendingOutputCompletions; std::deque<VideoIOCompletion> mPendingOutputCompletions;
std::thread mOutputCompletionWorker; std::thread mOutputCompletionWorker;
std::mutex mOutputProducerMutex;
std::condition_variable mOutputProducerCondition;
std::thread mOutputProducerWorker;
VideoIOCompletion mLastOutputProductionCompletion;
std::chrono::steady_clock::time_point mLastOutputProductionTime;
std::mutex mOutputProductionMutex;
mutable std::mutex mOutputMetricsMutex;
bool mOutputCompletionWorkerRunning = false; bool mOutputCompletionWorkerRunning = false;
bool mOutputCompletionWorkerStopping = false; bool mOutputCompletionWorkerStopping = false;
bool mOutputProducerWorkerRunning = false;
bool mOutputProducerWorkerStopping = false;
uint64_t mNextReadyOutputFrameIndex = 0; uint64_t mNextReadyOutputFrameIndex = 0;
uint64_t mInputFrameIndex = 0; uint64_t mInputFrameIndex = 0;
uint64_t mOutputFrameScheduleIndex = 0; uint64_t mOutputFrameScheduleIndex = 0;
@@ -121,6 +139,11 @@ private:
double mOutputRenderMilliseconds = 0.0; double mOutputRenderMilliseconds = 0.0;
double mSmoothedOutputRenderMilliseconds = 0.0; double mSmoothedOutputRenderMilliseconds = 0.0;
double mMaxOutputRenderMilliseconds = 0.0; double mMaxOutputRenderMilliseconds = 0.0;
double mOutputFrameAcquireMilliseconds = 0.0;
double mOutputFrameRenderRequestMilliseconds = 0.0;
double mOutputFrameEndAccessMilliseconds = 0.0;
uint64_t mLastLateStreak = 0;
uint64_t mLastDropStreak = 0;
uint64_t mLateFrameCount = 0; uint64_t mLateFrameCount = 0;
uint64_t mDroppedFrameCount = 0; uint64_t mDroppedFrameCount = 0;
uint64_t mFlushedFrameCount = 0; uint64_t mFlushedFrameCount = 0;

View File

@@ -30,5 +30,8 @@ inline VideoPlayoutPolicy NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy policy)
policy.targetReadyFrames = 1; policy.targetReadyFrames = 1;
if (policy.maxReadyFrames < policy.targetReadyFrames) if (policy.maxReadyFrames < policy.targetReadyFrames)
policy.maxReadyFrames = policy.targetReadyFrames; policy.maxReadyFrames = policy.targetReadyFrames;
const unsigned minimumOutputFramePoolSize = policy.targetPrerollFrames + policy.maxReadyFrames + policy.minimumSpareDeviceFrames;
if (policy.outputFramePoolSize < minimumOutputFramePoolSize)
policy.outputFramePoolSize = minimumOutputFramePoolSize;
return policy; return policy;
} }

View File

@@ -423,7 +423,6 @@ bool DeckLinkSession::AcquireNextOutputVideoFrame(CComPtr<IDeckLinkMutableVideoF
return false; return false;
outputVideoFrame = outputVideoFrameQueue.front(); outputVideoFrame = outputVideoFrameQueue.front();
outputVideoFrameQueue.push_back(outputVideoFrame);
outputVideoFrameQueue.pop_front(); outputVideoFrameQueue.pop_front();
return outputVideoFrame != nullptr; return outputVideoFrame != nullptr;
} }
@@ -448,6 +447,7 @@ bool DeckLinkSession::PopulateOutputFrame(IDeckLinkMutableVideoFrame* outputVide
frame.width = mState.outputFrameSize.width; frame.width = mState.outputFrameSize.width;
frame.height = mState.outputFrameSize.height; frame.height = mState.outputFrameSize.height;
frame.pixelFormat = mState.outputPixelFormat; frame.pixelFormat = mState.outputPixelFormat;
outputVideoFrame->AddRef();
frame.nativeFrame = outputVideoFrame; frame.nativeFrame = outputVideoFrame;
frame.nativeBuffer = outputVideoFrameBuffer.Detach(); frame.nativeBuffer = outputVideoFrameBuffer.Detach();
return true; return true;
@@ -506,7 +506,10 @@ VideoPlayoutRecoveryDecision DeckLinkSession::AccountForCompletionResult(VideoIO
bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame) bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame)
{ {
IDeckLinkMutableVideoFrame* outputVideoFrame = static_cast<IDeckLinkMutableVideoFrame*>(frame.nativeFrame); IDeckLinkMutableVideoFrame* outputVideoFrame = static_cast<IDeckLinkMutableVideoFrame*>(frame.nativeFrame);
return ScheduleFrame(outputVideoFrame); const bool scheduled = ScheduleFrame(outputVideoFrame);
if (outputVideoFrame != nullptr)
outputVideoFrame->Release();
return scheduled;
} }
bool DeckLinkSession::Start() bool DeckLinkSession::Start()
@@ -616,8 +619,18 @@ void DeckLinkSession::HandleVideoInputFrame(IDeckLinkVideoInputFrame* inputFrame
inputFrameBuffer->EndAccess(bmdBufferAccessRead); inputFrameBuffer->EndAccess(bmdBufferAccessRead);
} }
void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame*, BMDOutputFrameCompletionResult completionResult) void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult)
{ {
if (completedFrame != nullptr)
{
CComPtr<IDeckLinkMutableVideoFrame> reusableFrame;
if (completedFrame->QueryInterface(IID_IDeckLinkMutableVideoFrame, reinterpret_cast<void**>(&reusableFrame)) == S_OK &&
reusableFrame != nullptr)
{
outputVideoFrameQueue.push_back(reusableFrame);
}
}
if (!mOutputFrameCallback) if (!mOutputFrameCallback)
return; return;

View File

@@ -7,7 +7,7 @@ Phase 7 made backend lifecycle, playout policy, ready-frame queueing, late/drop
## Status ## Status
- Phase 7.5 design package: proposed. - Phase 7.5 design package: proposed.
- Phase 7.5 implementation: Step 2 in progress. - Phase 7.5 implementation: Step 5 in progress.
- Current alignment: Phase 7 is complete. `RenderOutputQueue`, `VideoPlayoutPolicy`, `VideoPlayoutScheduler`, `VideoBackendLifecycle`, and backend playout telemetry exist. The backend worker fills the ready queue on completion demand, but render production is not yet proactively driven by queue pressure or video cadence. - Current alignment: Phase 7 is complete. `RenderOutputQueue`, `VideoPlayoutPolicy`, `VideoPlayoutScheduler`, `VideoBackendLifecycle`, and backend playout telemetry exist. The backend worker fills the ready queue on completion demand, but render production is not yet proactively driven by queue pressure or video cadence.
Current footholds: Current footholds:
@@ -19,6 +19,9 @@ Current footholds:
- `HealthTelemetry::BackendPlayoutSnapshot` exposes queue depth, underruns, late/drop streaks, and recovery decisions. - `HealthTelemetry::BackendPlayoutSnapshot` exposes queue depth, underruns, late/drop streaks, and recovery decisions.
- Step 1 adds baseline timing fields for ready-queue min/max/zero-depth samples and output render duration. - Step 1 adds baseline timing fields for ready-queue min/max/zero-depth samples and output render duration.
- Step 2 adds a pure `OutputProductionController` for queue-pressure production decisions. - Step 2 adds a pure `OutputProductionController` for queue-pressure production decisions.
- Step 3 adds a proactive output producer worker that keeps `RenderOutputQueue` warm after playback starts.
- Step 4 skips non-forced preview presentation while output ready-queue depth is below target.
- Step 5 makes async readback misses prefer cached output over synchronous readback after bootstrap.
## Timing Review Findings ## Timing Review Findings
@@ -199,15 +202,23 @@ Move from demand-filled output production to queue-pressure production.
Initial target: Initial target:
- producer wakes when queue depth is below target - [x] producer wakes when queue depth is below target
- producer requests render-thread output production until target depth is reached - [x] producer requests render-thread output production until target depth is reached
- producer stops when backend stops or render thread shuts down - [x] producer stops when backend stops or render thread shuts down
- completion worker mostly schedules from already-ready frames - [x] completion worker mostly schedules from already-ready frames
Exit criteria: Exit criteria:
- normal playback does not depend on completion processing to fill the queue from empty - [x] normal playback does not depend on completion processing to fill the queue from empty
- callback/completion pressure and render production pressure are separate - [x] callback/completion pressure and render production pressure are separate
Implementation notes:
- `VideoBackend` starts the completion worker before device start, then starts the output producer only after DeckLink start succeeds. This avoids fighting DeckLink preroll for the same output frame pool.
- `OutputProducerWorkerMain()` periodically wakes and uses `OutputProductionController` to decide whether to produce, wait, or throttle.
- Completion handling records pacing/recovery, updates producer pressure, schedules a ready frame, and wakes the producer to refill headroom.
- Completion handling keeps a one-frame synchronous fallback when the ready queue is unexpectedly empty, then falls back to black underrun behavior if that also fails.
- Producer shutdown is explicit and joined before video output teardown.
### Step 4. Prioritize Playout Over Preview ### Step 4. Prioritize Playout Over Preview
@@ -215,15 +226,21 @@ Make preview explicitly subordinate to output playout deadlines.
Initial target: Initial target:
- skip or delay preview when ready queue depth is below target - [x] skip or delay preview when ready queue depth is below target
- count skipped previews - count skipped previews
- record preview present cost separately from output render cost - record preview present cost separately from output render cost
Exit criteria: Exit criteria:
- preview cannot drain output headroom invisibly - [x] preview cannot drain output headroom invisibly
- runtime telemetry shows preview skips and preview present cost - runtime telemetry shows preview skips and preview present cost
Implementation notes:
- `OpenGLComposite::paintGL(false)` now skips preview presentation when `VideoBackend` reports that the ready queue is below the target depth.
- Forced preview paints are still allowed so resize/manual paint behavior remains intact.
- Preview skip counters and present-cost telemetry remain follow-up work for this step.
### Step 5. Make Readback Miss Policy Deadline-Aware ### Step 5. Make Readback Miss Policy Deadline-Aware
Avoid turning a late async readback fence into synchronous deadline pressure by default. Avoid turning a late async readback fence into synchronous deadline pressure by default.
@@ -232,13 +249,20 @@ Initial target:
- count async readback misses - count async readback misses
- count synchronous fallback uses - count synchronous fallback uses
- allow policy to prefer stale/black output over synchronous fallback when queue pressure is high - [x] allow policy to prefer stale/black output over synchronous fallback when queue pressure is high
- keep current fallback available while behavior is measured - [x] keep current fallback available while behavior is measured
Exit criteria: Exit criteria:
- readback fallback is an explicit policy decision - [x] readback fallback is an explicit policy decision
- late GPU fences do not automatically block the most timing-sensitive path - [x] late GPU fences do not automatically block the most timing-sensitive path
Implementation notes:
- `OpenGLRenderPipeline::ReadOutputFrame()` now uses synchronous readback only to bootstrap the first cached output frame.
- After cached output exists, an async readback miss copies the cached output frame into the DeckLink output frame instead of blocking on synchronous `glReadPixels`.
- Async readback queueing now skips when the next PBO slot is still in flight rather than deleting an in-flight fence and overwriting it.
- Miss/fallback counters remain follow-up telemetry work for this step.
### Step 6. Tune Headroom Policy ### Step 6. Tune Headroom Policy

View File

@@ -0,0 +1,165 @@
# Phase 7.5 Readback Experiment Log
This log tracks short readback experiments during the proactive playout timing work.
## How To Run
The default debugger launch keeps the current production path:
- `Debug LoopThroughWithOpenGLCompositing`
- `VST_OUTPUT_READBACK_MODE` unset
- mode: `async_pbo`
Comparison modes are still available:
- `VST_OUTPUT_READBACK_MODE=async_pbo`
- uses the older PBO/fence readback path
The experiment launches are:
- `Debug LoopThroughWithOpenGLCompositing - sync readback experiment`
- `VST_OUTPUT_READBACK_MODE=sync`
- uses direct synchronous `glReadPixels()` every output frame
- `Debug LoopThroughWithOpenGLCompositing - cached output experiment`
- `VST_OUTPUT_READBACK_MODE=cached_only`
- uses one bootstrap synchronous readback, then copies the cached output frame without ongoing GPU readback
The cached-output experiment is not visually correct for live motion. It exists to test whether removing ongoing GPU readback lets the producer fill the ready queue again.
## Experiment 3: fast_transfer
Status: removed from active code after hardware sample
Date: 2026-05-11
Change:
- DeckLink output frames are now created with `CreateVideoFrameWithBuffer()`.
- Output frame buffers are owned by `PinnedMemoryAllocator`.
- `VideoIOOutputFrame` carries a texture-transfer callback.
- The test branch changed the default render readback path to try `VideoFrameTransfer::GPUtoCPU` against the output texture for BGRA output.
- If fast transfer is unavailable or fails, the code falls back to cached output if present, then synchronous readback as a safety fallback.
Question:
Can SDK-style pinned/DVP transfer recover real rendered output timing without the visually-invalid cached-only shortcut?
Result:
- The test machine reported `GL_VENDOR=NVIDIA Corporation` and `GL_RENDERER=NVIDIA GeForce RTX 4060 Ti/PCIe/SSE2`.
- The DeckLink SDK OpenGL fast-transfer sample gates NVIDIA DVP on `GL_RENDERER` containing `Quadro`.
- `GL_AMD_pinned_memory` was also unavailable.
- The fast-transfer path was removed from active code to avoid carrying unsupported DVP dependencies while we investigate CPU-frame buffering and render-ahead.
## Baseline: async_pbo
Date: 2026-05-11
Observed while the app was running after adding the async queue split counters.
Summary:
- ready queue was pinned at 0 or briefly 1
- underrun, zero-depth, late, and dropped counts increased continuously
- `renderRequestMs` usually sat around 16-25 ms, with occasional larger spikes
- `asyncQueueMs` was mostly explained by `asyncQueueReadPixelsMs`
- PBO allocation/orphaning was effectively 0 ms
Representative samples:
| readyDepth | renderRequestMs | queueWaitMs | drawMs | mapMs | copyMs | asyncQueueMs | asyncQueueBufferMs | asyncQueueReadPixelsMs |
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
| 0 | 24.915 | 3.018 | 0.510 | 0.923 | 0.768 | 9.018 | 0.000 | 9.001 |
| 0 | 16.226 | 3.066 | 0.518 | 1.202 | 0.812 | 8.611 | 0.000 | 8.598 |
| 0 | 12.134 | 3.796 | 3.579 | 1.378 | 0.690 | 10.323 | 0.000 | 10.311 |
| 0 | 17.496 | 2.817 | 0.523 | 1.267 | 1.160 | 9.416 | 0.000 | 9.403 |
Initial read:
The main repeated cost is issuing `glReadPixels(..., nullptr)` into the PBO. `glBufferData`, setup, fence creation, fence wait, map, and CPU copy are not large enough to explain the underruns.
## Experiment 1: sync
Status: sampled
Question:
Does the direct synchronous readback path perform better or worse than the current PBO path on this machine and DeckLink format?
Expected interpretation:
- If `syncReadMs` is lower than `asyncQueueReadPixelsMs` and the ready queue improves, the current PBO path is the wrong strategy for this driver/format.
- If `syncReadMs` is also high and the ready queue remains empty, any GPU-to-CPU readback in this path is too expensive for the current producer cadence.
Results:
Date: 2026-05-11
Summary:
- ready queue remained pinned at 0
- underrun, zero-depth, late, and dropped counts continued increasing
- `asyncQueueMs` and async readback counters were 0, confirming the experiment mode was active
- direct `syncReadMs` was generally worse than the baseline PBO `asyncQueueReadPixelsMs`
Representative samples:
| readyDepth | renderRequestMs | queueWaitMs | drawMs | syncReadMs | asyncQueueMs | syncFallbackCount |
| --- | ---: | ---: | ---: | ---: | ---: | ---: |
| 0 | 32.467 | 5.764 | 1.389 | 23.122 | 0.000 | 680 |
| 0 | 29.722 | 2.603 | 0.512 | 25.538 | 0.000 | 697 |
| 0 | 37.844 | 7.716 | 0.518 | 23.608 | 0.000 | 706 |
| 0 | 22.304 | 3.089 | 1.843 | 15.278 | 0.000 | 723 |
| 0 | 27.196 | 4.015 | 0.500 | 21.933 | 0.000 | 736 |
Read:
Direct synchronous readback does not recover the queue and is slower than the async PBO path on the sampled run. The bottleneck appears to be GPU-to-CPU readback itself, not PBO orphaning or fence handling.
## Experiment 2: cached_only
Status: sampled
Question:
If ongoing GPU readback is removed after bootstrap, can the producer keep the ready queue above 0?
Expected interpretation:
- If ready depth rises and underruns slow or stop, readback is the primary bottleneck.
- If ready depth still stays near 0, the bottleneck is elsewhere in scheduling, frame acquisition, queueing, or DeckLink handoff.
Results:
Date: 2026-05-11
User-visible result:
- DeckLink reported a healthy 5-frame buffer.
Telemetry summary:
- `renderRequestMs` dropped to roughly 1-3 ms.
- `cachedCopyMs` was usually around 0.8-1.0 ms, with one sampled low value around 0.37 ms.
- `asyncQueueMs`, `asyncQueueReadPixelsMs`, `syncReadMs`, fence wait, map, and async copy were 0 after bootstrap.
- `syncFallbackCount` stayed at 1, confirming one bootstrap readback.
- `cachedFallbackCount` increased continuously, confirming ongoing frames were served from cached CPU memory.
- late and dropped counts were 0 during the sampled run.
- internal ready queue depth still reported mostly 0-1 even while DeckLink showed a healthy hardware/device buffer.
Representative samples:
| readyDepth | renderRequestMs | queueWaitMs | drawMs | cachedCopyMs | asyncQueueMs | syncReadMs | late | dropped |
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
| 0 | 1.446 | 0.018 | 0.518 | 0.864 | 0.000 | 0.000 | 0 | 0 |
| 0 | 2.586 | 1.089 | 0.514 | 0.829 | 0.000 | 0.000 | 0 | 0 |
| 0 | 1.481 | 2.378 | 0.502 | 0.911 | 0.000 | 0.000 | 0 | 0 |
| 0 | 0.892 | 0.013 | 0.468 | 0.371 | 0.000 | 0.000 | 0 | 0 |
| 1 | 1.398 | 0.019 | 0.483 | 0.819 | 0.000 | 0.000 | 0 | 0 |
Read:
Removing ongoing GPU readback recovers output timing immediately. The direct cause of the Phase 7.5 playback collapse is the per-frame GPU-to-CPU readback cost, not DeckLink frame acquisition, output frame end-access, PBO allocation, fence waiting, or CPU copy.
The internal ready queue depth still being low while DeckLink reports a healthy device buffer suggests the ready queue is acting as a short staging queue rather than the full device playout buffer. For the next fix, prioritize avoiding a blocking readback on every output frame instead of only increasing internal ready queue depth.

View File

@@ -363,6 +363,10 @@ components:
$ref: "#/components/schemas/VideoIOStatus" $ref: "#/components/schemas/VideoIOStatus"
performance: performance:
$ref: "#/components/schemas/PerformanceStatus" $ref: "#/components/schemas/PerformanceStatus"
backendPlayout:
$ref: "#/components/schemas/BackendPlayoutStatus"
runtimeEvents:
$ref: "#/components/schemas/RuntimeEventStatus"
shaders: shaders:
type: array type: array
items: items:
@@ -382,10 +386,16 @@ components:
type: number type: number
oscPort: oscPort:
type: number type: number
oscBindAddress:
type: string
oscSmoothing:
type: number
autoReload: autoReload:
type: boolean type: boolean
maxTemporalHistoryFrames: maxTemporalHistoryFrames:
type: number type: number
previewFps:
type: number
enableExternalKeying: enableExternalKeying:
type: boolean type: boolean
inputVideoFormat: inputVideoFormat:
@@ -478,6 +488,175 @@ components:
type: number type: number
flushedFrameCount: flushedFrameCount:
type: number type: number
BackendPlayoutStatus:
type: object
properties:
lifecycleState:
type: string
example: running
degraded:
type: boolean
statusMessage:
type: string
lateFrameCount:
type: number
droppedFrameCount:
type: number
flushedFrameCount:
type: number
readyQueue:
$ref: "#/components/schemas/BackendReadyQueueStatus"
outputRender:
$ref: "#/components/schemas/BackendOutputRenderStatus"
recovery:
$ref: "#/components/schemas/BackendPlayoutRecoveryStatus"
BackendReadyQueueStatus:
type: object
properties:
depth:
type: number
description: Current number of ready output frames.
capacity:
type: number
description: Maximum ready output frames currently allowed.
minDepth:
type: number
description: Minimum observed ready queue depth since backend worker start.
maxDepth:
type: number
description: Maximum observed ready queue depth since backend worker start.
zeroDepthCount:
type: number
description: Number of observed samples where the ready queue was empty.
pushedCount:
type: number
poppedCount:
type: number
droppedCount:
type: number
underrunCount:
type: number
BackendOutputRenderStatus:
type: object
properties:
renderMs:
type: number
description: Most recent output render duration in milliseconds.
smoothedRenderMs:
type: number
description: Smoothed output render duration in milliseconds.
maxRenderMs:
type: number
description: Maximum observed output render duration in milliseconds.
acquireFrameMs:
type: number
description: Time spent acquiring a writable backend output frame in milliseconds.
renderRequestMs:
type: number
description: Time spent executing the render-thread output frame request in milliseconds.
endAccessMs:
type: number
description: Time spent ending write access to the backend output frame in milliseconds.
queueWaitMs:
type: number
description: Time the output render request spent waiting for the render thread in milliseconds.
drawMs:
type: number
description: Time spent drawing, blitting, packing, and flushing the output frame in milliseconds.
fenceWaitMs:
type: number
description: Time spent waiting for the async readback fence in milliseconds.
mapMs:
type: number
description: Time spent mapping the async readback pixel buffer in milliseconds.
readbackCopyMs:
type: number
description: Time spent copying async readback bytes into the backend output frame in milliseconds.
cachedCopyMs:
type: number
description: Time spent copying the cached output frame when async readback is not ready in milliseconds.
asyncQueueMs:
type: number
description: Time spent queueing the next async readback in milliseconds.
asyncQueueBufferMs:
type: number
description: Time spent orphaning or allocating the async readback pixel buffer in milliseconds.
asyncQueueSetupMs:
type: number
description: Time spent applying readback pixel-store, framebuffer, and pixel-pack-buffer state in milliseconds.
asyncQueueReadPixelsMs:
type: number
description: Time spent issuing glReadPixels for the async readback in milliseconds.
asyncQueueFenceMs:
type: number
description: Time spent creating the async readback fence in milliseconds.
syncReadMs:
type: number
description: Time spent in bootstrap synchronous readback in milliseconds.
asyncReadbackMissCount:
type: number
description: Count of output render requests where async readback was not ready.
cachedFallbackCount:
type: number
description: Count of output render requests served from the cached output frame.
syncFallbackCount:
type: number
description: Count of output render requests that used bootstrap synchronous readback.
BackendPlayoutRecoveryStatus:
type: object
properties:
completionResult:
type: string
enum: [Completed, DisplayedLate, Dropped, Flushed, Unknown]
completedFrameIndex:
type: number
scheduledFrameIndex:
type: number
scheduledLeadFrames:
type: number
measuredLagFrames:
type: number
catchUpFrames:
type: number
lateStreak:
type: number
dropStreak:
type: number
RuntimeEventStatus:
type: object
properties:
queue:
$ref: "#/components/schemas/RuntimeEventQueueStatus"
dispatch:
$ref: "#/components/schemas/RuntimeEventDispatchStatus"
RuntimeEventQueueStatus:
type: object
properties:
name:
type: string
depth:
type: number
capacity:
type: number
droppedCount:
type: number
oldestEventAgeMs:
type: number
RuntimeEventDispatchStatus:
type: object
properties:
dispatchCallCount:
type: number
dispatchedEventCount:
type: number
handlerInvocationCount:
type: number
handlerFailureCount:
type: number
lastDispatchDurationMs:
type: number
maxDispatchDurationMs:
type: number
ShaderSummary: ShaderSummary:
type: object type: object
properties: properties:
@@ -497,6 +676,8 @@ components:
description: Error text for unavailable shader packages. description: Error text for unavailable shader packages.
temporal: temporal:
$ref: "#/components/schemas/TemporalState" $ref: "#/components/schemas/TemporalState"
feedback:
$ref: "#/components/schemas/FeedbackState"
TemporalState: TemporalState:
type: object type: object
properties: properties:
@@ -509,6 +690,13 @@ components:
type: number type: number
effectiveHistoryLength: effectiveHistoryLength:
type: number type: number
FeedbackState:
type: object
properties:
enabled:
type: boolean
writePass:
type: string
LayerState: LayerState:
type: object type: object
properties: properties:

View File

@@ -95,6 +95,9 @@ void TestBackendPlayoutHealth()
8.5, 8.5,
7.25, 7.25,
12.0, 12.0,
1.0,
6.5,
0.5,
8, 8,
11, 11,
3, 3,
@@ -121,6 +124,9 @@ void TestBackendPlayoutHealth()
Expect(playout.outputRenderMilliseconds == 8.5, "backend playout health stores output render duration"); Expect(playout.outputRenderMilliseconds == 8.5, "backend playout health stores output render duration");
Expect(playout.smoothedOutputRenderMilliseconds == 7.25, "backend playout health stores smoothed output render duration"); Expect(playout.smoothedOutputRenderMilliseconds == 7.25, "backend playout health stores smoothed output render duration");
Expect(playout.maxOutputRenderMilliseconds == 12.0, "backend playout health stores max output render duration"); Expect(playout.maxOutputRenderMilliseconds == 12.0, "backend playout health stores max output render duration");
Expect(playout.outputFrameAcquireMilliseconds == 1.0, "backend playout health stores output frame acquire duration");
Expect(playout.outputFrameRenderRequestMilliseconds == 6.5, "backend playout health stores output render request duration");
Expect(playout.outputFrameEndAccessMilliseconds == 0.5, "backend playout health stores output frame end access duration");
Expect(playout.completedFrameIndex == 8, "backend playout health stores completed index"); Expect(playout.completedFrameIndex == 8, "backend playout health stores completed index");
Expect(playout.scheduledFrameIndex == 11, "backend playout health stores scheduled index"); Expect(playout.scheduledFrameIndex == 11, "backend playout health stores scheduled index");
Expect(playout.measuredLagFrames == 2, "backend playout health stores measured lag"); Expect(playout.measuredLagFrames == 2, "backend playout health stores measured lag");
@@ -148,6 +154,9 @@ void TestBackendPlayoutHealth()
-5.0, -5.0,
-4.0, -4.0,
-3.0, -3.0,
-2.0,
-1.0,
-0.5,
9, 9,
12, 12,
3, 3,
@@ -165,6 +174,32 @@ void TestBackendPlayoutHealth()
Expect(snapshot.backendPlayout.lifecycleState == "Running", "full health snapshot includes backend playout state"); Expect(snapshot.backendPlayout.lifecycleState == "Running", "full health snapshot includes backend playout state");
Expect(!snapshot.backendPlayout.degraded, "full health snapshot includes backend degraded state"); Expect(!snapshot.backendPlayout.degraded, "full health snapshot includes backend degraded state");
} }
void TestOutputRenderPipelineTiming()
{
HealthTelemetry telemetry;
telemetry.RecordOutputRenderQueueWait(2.5);
telemetry.RecordOutputRenderPipelineTiming(1.0, 0.5, 0.25, 0.75, 0.125, 0.375, 0.1, 0.2, 0.3, 0.4, 3.5, true, true, false);
Expect(telemetry.TryRecordOutputRenderPipelineTiming(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, false, false, true),
"try output render timing succeeds when uncontended");
const HealthTelemetry::BackendPlayoutSnapshot playout = telemetry.GetBackendPlayoutSnapshot();
Expect(playout.outputRenderQueueWaitMilliseconds == 2.5, "output render timing stores queue wait");
Expect(playout.outputRenderDrawMilliseconds == 0.0, "output render timing clamps draw duration");
Expect(playout.outputReadbackFenceWaitMilliseconds == 0.0, "output render timing clamps fence wait duration");
Expect(playout.outputReadbackMapMilliseconds == 0.0, "output render timing clamps map duration");
Expect(playout.outputReadbackCopyMilliseconds == 0.0, "output render timing clamps readback copy duration");
Expect(playout.outputCachedCopyMilliseconds == 0.0, "output render timing clamps cached copy duration");
Expect(playout.outputAsyncQueueMilliseconds == 0.0, "output render timing clamps async queue duration");
Expect(playout.outputAsyncQueueBufferMilliseconds == 0.0, "output render timing clamps async queue buffer duration");
Expect(playout.outputAsyncQueueSetupMilliseconds == 0.0, "output render timing clamps async queue setup duration");
Expect(playout.outputAsyncQueueReadPixelsMilliseconds == 0.0, "output render timing clamps async queue read pixels duration");
Expect(playout.outputAsyncQueueFenceMilliseconds == 0.0, "output render timing clamps async queue fence duration");
Expect(playout.outputSyncReadMilliseconds == 0.0, "output render timing clamps sync read duration");
Expect(playout.outputAsyncReadbackMissCount == 1, "output render timing counts async readback misses");
Expect(playout.outputCachedFallbackCount == 1, "output render timing counts cached fallbacks");
Expect(playout.outputSyncFallbackCount == 1, "output render timing counts sync fallbacks");
}
} }
int main() int main()
@@ -174,6 +209,7 @@ int main()
TestRuntimeEventTryRecord(); TestRuntimeEventTryRecord();
TestPersistenceWriteHealth(); TestPersistenceWriteHealth();
TestBackendPlayoutHealth(); TestBackendPlayoutHealth();
TestOutputRenderPipelineTiming();
if (gFailures != 0) if (gFailures != 0)
{ {

View File

@@ -93,9 +93,10 @@ void TestPolicyNormalization()
policy.maxReadyFrames = 2; policy.maxReadyFrames = 2;
VideoPlayoutPolicy normalized = NormalizeVideoPlayoutPolicy(policy); VideoPlayoutPolicy normalized = NormalizeVideoPlayoutPolicy(policy);
Expect(normalized.outputFramePoolSize == 1, "policy normalization keeps at least one output frame");
Expect(normalized.targetPrerollFrames == 1, "policy normalization keeps at least one preroll frame"); Expect(normalized.targetPrerollFrames == 1, "policy normalization keeps at least one preroll frame");
Expect(normalized.maxReadyFrames == normalized.targetReadyFrames, "policy normalization keeps max ready frames above target"); Expect(normalized.maxReadyFrames == normalized.targetReadyFrames, "policy normalization keeps max ready frames above target");
Expect(normalized.outputFramePoolSize >= normalized.targetPrerollFrames + normalized.maxReadyFrames + normalized.minimumSpareDeviceFrames,
"policy normalization keeps enough output frames for preroll and ready queue ownership");
} }
void TestFrameBudgets() void TestFrameBudgets()