Performance chasing
This commit is contained in:
44
.vscode/launch.json
vendored
44
.vscode/launch.json
vendored
@@ -17,6 +17,50 @@
|
|||||||
"moduleLoad": true
|
"moduleLoad": true
|
||||||
},
|
},
|
||||||
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
|
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Debug LoopThroughWithOpenGLCompositing - sync readback experiment",
|
||||||
|
"type": "cppvsdbg",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\LoopThroughWithOpenGLCompositing.exe",
|
||||||
|
"args": [],
|
||||||
|
"stopAtEntry": false,
|
||||||
|
"cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
|
||||||
|
"environment": [
|
||||||
|
{
|
||||||
|
"name": "VST_OUTPUT_READBACK_MODE",
|
||||||
|
"value": "sync"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"console": "internalConsole",
|
||||||
|
"symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
|
||||||
|
"requireExactSource": true,
|
||||||
|
"logging": {
|
||||||
|
"moduleLoad": true
|
||||||
|
},
|
||||||
|
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Debug LoopThroughWithOpenGLCompositing - cached output experiment",
|
||||||
|
"type": "cppvsdbg",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\LoopThroughWithOpenGLCompositing.exe",
|
||||||
|
"args": [],
|
||||||
|
"stopAtEntry": false,
|
||||||
|
"cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
|
||||||
|
"environment": [
|
||||||
|
{
|
||||||
|
"name": "VST_OUTPUT_READBACK_MODE",
|
||||||
|
"value": "cached_only"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"console": "internalConsole",
|
||||||
|
"symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
|
||||||
|
"requireExactSource": true,
|
||||||
|
"logging": {
|
||||||
|
"moduleLoad": true
|
||||||
|
},
|
||||||
|
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ RenderEngine::RenderEngine(
|
|||||||
mRenderPass(mRenderer),
|
mRenderPass(mRenderer),
|
||||||
mRenderPipeline(mRenderer, runtimeSnapshotProvider, healthTelemetry, std::move(renderEffect), std::move(screenshotReady), std::move(previewPaint)),
|
mRenderPipeline(mRenderer, runtimeSnapshotProvider, healthTelemetry, std::move(renderEffect), std::move(screenshotReady), std::move(previewPaint)),
|
||||||
mShaderPrograms(mRenderer, runtimeSnapshotProvider),
|
mShaderPrograms(mRenderer, runtimeSnapshotProvider),
|
||||||
|
mHealthTelemetry(healthTelemetry),
|
||||||
mHdc(hdc),
|
mHdc(hdc),
|
||||||
mHglrc(hglrc),
|
mHglrc(hglrc),
|
||||||
mFrameStateResolver(runtimeSnapshotProvider)
|
mFrameStateResolver(runtimeSnapshotProvider)
|
||||||
@@ -546,7 +547,11 @@ bool RenderEngine::RequestOutputFrame(const RenderPipelineFrameContext& context,
|
|||||||
{
|
{
|
||||||
if (mRenderThreadRunning)
|
if (mRenderThreadRunning)
|
||||||
{
|
{
|
||||||
return TryInvokeOnRenderThread("output-render", [this, &context, &outputFrame]() {
|
const auto queuedAt = std::chrono::steady_clock::now();
|
||||||
|
return TryInvokeOnRenderThread("output-render", [this, &context, &outputFrame, queuedAt]() {
|
||||||
|
const auto startedAt = std::chrono::steady_clock::now();
|
||||||
|
const double queueWaitMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(startedAt - queuedAt).count();
|
||||||
|
mHealthTelemetry.TryRecordOutputRenderQueueWait(queueWaitMilliseconds);
|
||||||
mRenderCommandQueue.RequestOutputFrame({ context.videoState, context.completion });
|
mRenderCommandQueue.RequestOutputFrame({ context.videoState, context.completion });
|
||||||
RenderOutputFrameRequest request;
|
RenderOutputFrameRequest request;
|
||||||
return mRenderCommandQueue.TryTakeOutputFrame(request) &&
|
return mRenderCommandQueue.TryTakeOutputFrame(request) &&
|
||||||
|
|||||||
@@ -209,6 +209,7 @@ private:
|
|||||||
OpenGLRenderPass mRenderPass;
|
OpenGLRenderPass mRenderPass;
|
||||||
OpenGLRenderPipeline mRenderPipeline;
|
OpenGLRenderPipeline mRenderPipeline;
|
||||||
OpenGLShaderPrograms mShaderPrograms;
|
OpenGLShaderPrograms mShaderPrograms;
|
||||||
|
HealthTelemetry& mHealthTelemetry;
|
||||||
HDC mHdc;
|
HDC mHdc;
|
||||||
HGLRC mHglrc;
|
HGLRC mHglrc;
|
||||||
|
|
||||||
|
|||||||
@@ -164,6 +164,9 @@ error:
|
|||||||
|
|
||||||
void OpenGLComposite::paintGL(bool force)
|
void OpenGLComposite::paintGL(bool force)
|
||||||
{
|
{
|
||||||
|
if (mRuntimeUpdateController)
|
||||||
|
mRuntimeUpdateController->ProcessRuntimeWork();
|
||||||
|
|
||||||
if (!force)
|
if (!force)
|
||||||
{
|
{
|
||||||
if (IsIconic(hGLWnd))
|
if (IsIconic(hGLWnd))
|
||||||
@@ -171,6 +174,12 @@ void OpenGLComposite::paintGL(bool force)
|
|||||||
}
|
}
|
||||||
|
|
||||||
const unsigned previewFps = mRuntimeStore ? mRuntimeStore->GetConfiguredPreviewFps() : 30u;
|
const unsigned previewFps = mRuntimeStore ? mRuntimeStore->GetConfiguredPreviewFps() : 30u;
|
||||||
|
if (!force && mVideoBackend && mVideoBackend->ShouldPrioritizeOutputOverPreview())
|
||||||
|
{
|
||||||
|
ValidateRect(hGLWnd, NULL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (!mRenderEngine->TryPresentPreview(force, previewFps, mVideoBackend->OutputFrameWidth(), mVideoBackend->OutputFrameHeight()))
|
if (!mRenderEngine->TryPresentPreview(force, previewFps, mVideoBackend->OutputFrameWidth(), mVideoBackend->OutputFrameHeight()))
|
||||||
{
|
{
|
||||||
ValidateRect(hGLWnd, NULL);
|
ValidateRect(hGLWnd, NULL);
|
||||||
@@ -261,6 +270,9 @@ bool OpenGLComposite::Start()
|
|||||||
if (!mRenderEngine->StartRenderThread())
|
if (!mRenderEngine->StartRenderThread())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (mRuntimeUpdateController)
|
||||||
|
mRuntimeUpdateController->ProcessRuntimeWork();
|
||||||
|
|
||||||
if (mVideoBackend->Start())
|
if (mVideoBackend->Start())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@@ -351,9 +363,6 @@ bool OpenGLComposite::RequestScreenshot(std::string& error)
|
|||||||
|
|
||||||
void OpenGLComposite::renderEffect()
|
void OpenGLComposite::renderEffect()
|
||||||
{
|
{
|
||||||
if (mRuntimeUpdateController)
|
|
||||||
mRuntimeUpdateController->ProcessRuntimeWork();
|
|
||||||
|
|
||||||
const RenderFrameInput frameInput = BuildRenderFrameInput();
|
const RenderFrameInput frameInput = BuildRenderFrameInput();
|
||||||
RenderFrame(frameInput);
|
RenderFrame(frameInput);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,9 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <cstdlib>
|
||||||
#include <gl/gl.h>
|
#include <gl/gl.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
OpenGLRenderPipeline::OpenGLRenderPipeline(
|
OpenGLRenderPipeline::OpenGLRenderPipeline(
|
||||||
OpenGLRenderer& renderer,
|
OpenGLRenderer& renderer,
|
||||||
@@ -22,7 +24,8 @@ OpenGLRenderPipeline::OpenGLRenderPipeline(
|
|||||||
mHealthTelemetry(healthTelemetry),
|
mHealthTelemetry(healthTelemetry),
|
||||||
mRenderEffect(renderEffect),
|
mRenderEffect(renderEffect),
|
||||||
mOutputReady(outputReady),
|
mOutputReady(outputReady),
|
||||||
mPaint(paint)
|
mPaint(paint),
|
||||||
|
mOutputReadbackMode(ReadOutputReadbackModeFromEnvironment())
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -53,9 +56,22 @@ bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context
|
|||||||
mHealthTelemetry.TryRecordPerformanceStats(state.frameBudgetMilliseconds, renderMilliseconds);
|
mHealthTelemetry.TryRecordPerformanceStats(state.frameBudgetMilliseconds, renderMilliseconds);
|
||||||
mRuntimeSnapshotProvider.AdvanceFrame();
|
mRuntimeSnapshotProvider.AdvanceFrame();
|
||||||
|
|
||||||
ReadOutputFrame(state, outputFrame);
|
OutputReadbackTiming readbackTiming = ReadOutputFrame(state, outputFrame);
|
||||||
if (mPaint)
|
mHealthTelemetry.TryRecordOutputRenderPipelineTiming(
|
||||||
mPaint();
|
renderMilliseconds,
|
||||||
|
readbackTiming.fenceWaitMilliseconds,
|
||||||
|
readbackTiming.mapMilliseconds,
|
||||||
|
readbackTiming.copyMilliseconds,
|
||||||
|
readbackTiming.cachedCopyMilliseconds,
|
||||||
|
readbackTiming.asyncQueueMilliseconds,
|
||||||
|
readbackTiming.asyncQueueBufferMilliseconds,
|
||||||
|
readbackTiming.asyncQueueSetupMilliseconds,
|
||||||
|
readbackTiming.asyncQueueReadPixelsMilliseconds,
|
||||||
|
readbackTiming.asyncQueueFenceMilliseconds,
|
||||||
|
readbackTiming.syncReadMilliseconds,
|
||||||
|
readbackTiming.asyncReadbackMissed,
|
||||||
|
readbackTiming.cachedFallbackUsed,
|
||||||
|
readbackTiming.syncFallbackUsed);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -151,8 +167,9 @@ void OpenGLRenderPipeline::FlushAsyncReadbackPipeline()
|
|||||||
mAsyncReadbackReadIndex = 0;
|
mAsyncReadbackReadIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
|
bool OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing)
|
||||||
{
|
{
|
||||||
|
const auto queueStartTime = std::chrono::steady_clock::now();
|
||||||
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
|
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
|
||||||
const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height;
|
const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height;
|
||||||
const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA;
|
const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA;
|
||||||
@@ -161,8 +178,16 @@ void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
|
|||||||
const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width);
|
const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width);
|
||||||
const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height);
|
const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height);
|
||||||
|
|
||||||
|
const auto finishTiming = [&timing, queueStartTime]() {
|
||||||
|
const auto queueEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.asyncQueueMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(queueEndTime - queueStartTime).count();
|
||||||
|
};
|
||||||
|
|
||||||
if (requiredBytes == 0)
|
if (requiredBytes == 0)
|
||||||
return;
|
{
|
||||||
|
finishTiming();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (mAsyncReadbackBytes != requiredBytes
|
if (mAsyncReadbackBytes != requiredBytes
|
||||||
|| mAsyncReadbackFormat != format
|
|| mAsyncReadbackFormat != format
|
||||||
@@ -173,30 +198,50 @@ void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
|
|||||||
mAsyncReadbackType = type;
|
mAsyncReadbackType = type;
|
||||||
mAsyncReadbackFramebuffer = framebuffer;
|
mAsyncReadbackFramebuffer = framebuffer;
|
||||||
if (!EnsureAsyncReadbackBuffers(requiredBytes))
|
if (!EnsureAsyncReadbackBuffers(requiredBytes))
|
||||||
return;
|
{
|
||||||
|
finishTiming();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex];
|
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex];
|
||||||
if (slot.fence != nullptr)
|
if (slot.inFlight)
|
||||||
{
|
{
|
||||||
glDeleteSync(slot.fence);
|
finishTiming();
|
||||||
slot.fence = nullptr;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto stageStartTime = std::chrono::steady_clock::now();
|
||||||
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
||||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
|
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
||||||
|
auto stageEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.asyncQueueSetupMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
|
||||||
|
|
||||||
|
stageStartTime = std::chrono::steady_clock::now();
|
||||||
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
|
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
|
||||||
|
stageEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.asyncQueueBufferMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
|
||||||
|
|
||||||
|
stageStartTime = std::chrono::steady_clock::now();
|
||||||
glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr);
|
glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr);
|
||||||
|
stageEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.asyncQueueReadPixelsMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
|
||||||
|
|
||||||
|
stageStartTime = std::chrono::steady_clock::now();
|
||||||
slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||||
|
stageEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.asyncQueueFenceMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
|
||||||
slot.inFlight = slot.fence != nullptr;
|
slot.inFlight = slot.fence != nullptr;
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||||
|
|
||||||
mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size();
|
mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size();
|
||||||
|
finishTiming();
|
||||||
|
return slot.inFlight;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds)
|
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds, OutputReadbackTiming& timing)
|
||||||
{
|
{
|
||||||
if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr)
|
if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr)
|
||||||
return false;
|
return false;
|
||||||
@@ -206,15 +251,24 @@ bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFra
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0;
|
const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0;
|
||||||
|
const auto waitStartTime = std::chrono::steady_clock::now();
|
||||||
const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds);
|
const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds);
|
||||||
|
const auto waitEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.fenceWaitMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(waitEndTime - waitStartTime).count();
|
||||||
if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
|
if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
|
||||||
|
{
|
||||||
|
timing.asyncReadbackMissed = true;
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
glDeleteSync(slot.fence);
|
glDeleteSync(slot.fence);
|
||||||
slot.fence = nullptr;
|
slot.fence = nullptr;
|
||||||
|
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
||||||
|
const auto mapStartTime = std::chrono::steady_clock::now();
|
||||||
void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
||||||
|
const auto mapEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.mapMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(mapEndTime - mapStartTime).count();
|
||||||
if (mappedBytes == nullptr)
|
if (mappedBytes == nullptr)
|
||||||
{
|
{
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||||
@@ -223,7 +277,10 @@ bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFra
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto copyStartTime = std::chrono::steady_clock::now();
|
||||||
std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes);
|
std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes);
|
||||||
|
const auto copyEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.copyMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(copyEndTime - copyStartTime).count();
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||||
|
|
||||||
@@ -243,8 +300,26 @@ void OpenGLRenderPipeline::CacheOutputFrame(const VideoIOOutputFrame& outputFram
|
|||||||
std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount);
|
std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes)
|
bool OpenGLRenderPipeline::TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const
|
||||||
{
|
{
|
||||||
|
if (outputFrame.bytes == nullptr || outputFrame.height == 0 || outputFrame.rowBytes <= 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const std::size_t byteCount = static_cast<std::size_t>(outputFrame.rowBytes) * outputFrame.height;
|
||||||
|
if (mCachedOutputFrame.size() != byteCount)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const auto copyStartTime = std::chrono::steady_clock::now();
|
||||||
|
std::memcpy(outputFrame.bytes, mCachedOutputFrame.data(), byteCount);
|
||||||
|
const auto copyEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.cachedCopyMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(copyEndTime - copyStartTime).count();
|
||||||
|
timing.cachedFallbackUsed = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing)
|
||||||
|
{
|
||||||
|
const auto readStartTime = std::chrono::steady_clock::now();
|
||||||
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
|
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
|
||||||
|
|
||||||
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
||||||
@@ -259,24 +334,78 @@ void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& stat
|
|||||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
|
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
|
||||||
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes);
|
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes);
|
||||||
}
|
}
|
||||||
|
const auto readEndTime = std::chrono::steady_clock::now();
|
||||||
|
timing.syncReadMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(readEndTime - readStartTime).count();
|
||||||
|
timing.syncFallbackUsed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
|
OpenGLRenderPipeline::OutputReadbackTiming OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
|
||||||
{
|
{
|
||||||
if (TryConsumeAsyncReadback(outputFrame, 500000))
|
OutputReadbackTiming timing;
|
||||||
{
|
|
||||||
QueueAsyncReadback(state);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If async readback misses the playout deadline, prefer a fresh synchronous
|
if (mOutputReadbackMode == OutputReadbackMode::Synchronous)
|
||||||
// frame over reusing stale cached output, then restart the async pipeline.
|
{
|
||||||
if (outputFrame.bytes != nullptr)
|
if (outputFrame.bytes != nullptr)
|
||||||
{
|
{
|
||||||
ReadOutputFrameSynchronously(state, outputFrame.bytes);
|
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
|
||||||
|
CacheOutputFrame(outputFrame);
|
||||||
|
}
|
||||||
|
return timing;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mOutputReadbackMode == OutputReadbackMode::CachedOnly)
|
||||||
|
{
|
||||||
|
if (TryCopyCachedOutputFrame(outputFrame, timing))
|
||||||
|
return timing;
|
||||||
|
|
||||||
|
if (outputFrame.bytes != nullptr)
|
||||||
|
{
|
||||||
|
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
|
||||||
|
CacheOutputFrame(outputFrame);
|
||||||
|
}
|
||||||
|
return timing;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TryConsumeAsyncReadback(outputFrame, 500000, timing))
|
||||||
|
{
|
||||||
|
(void)QueueAsyncReadback(state, timing);
|
||||||
|
return timing;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TryCopyCachedOutputFrame(outputFrame, timing))
|
||||||
|
{
|
||||||
|
(void)QueueAsyncReadback(state, timing);
|
||||||
|
return timing;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bootstrap only: until the first async readback has produced cached output,
|
||||||
|
// use one synchronous readback so DeckLink has a valid frame to schedule.
|
||||||
|
if (outputFrame.bytes != nullptr && mCachedOutputFrame.empty())
|
||||||
|
{
|
||||||
|
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
|
||||||
CacheOutputFrame(outputFrame);
|
CacheOutputFrame(outputFrame);
|
||||||
}
|
}
|
||||||
|
|
||||||
FlushAsyncReadbackPipeline();
|
FlushAsyncReadbackPipeline();
|
||||||
QueueAsyncReadback(state);
|
(void)QueueAsyncReadback(state, timing);
|
||||||
|
return timing;
|
||||||
|
}
|
||||||
|
|
||||||
|
OpenGLRenderPipeline::OutputReadbackMode OpenGLRenderPipeline::ReadOutputReadbackModeFromEnvironment()
|
||||||
|
{
|
||||||
|
char* mode = nullptr;
|
||||||
|
std::size_t modeSize = 0;
|
||||||
|
if (_dupenv_s(&mode, &modeSize, "VST_OUTPUT_READBACK_MODE") != 0 || mode == nullptr)
|
||||||
|
return OutputReadbackMode::AsyncPbo;
|
||||||
|
|
||||||
|
const std::string modeValue(mode);
|
||||||
|
std::free(mode);
|
||||||
|
if (modeValue == "async_pbo")
|
||||||
|
return OutputReadbackMode::AsyncPbo;
|
||||||
|
if (modeValue == "sync")
|
||||||
|
return OutputReadbackMode::Synchronous;
|
||||||
|
if (modeValue == "cached_only")
|
||||||
|
return OutputReadbackMode::CachedOnly;
|
||||||
|
|
||||||
|
return OutputReadbackMode::AsyncPbo;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,6 +36,13 @@ public:
|
|||||||
bool RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame);
|
bool RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
enum class OutputReadbackMode
|
||||||
|
{
|
||||||
|
AsyncPbo,
|
||||||
|
Synchronous,
|
||||||
|
CachedOnly
|
||||||
|
};
|
||||||
|
|
||||||
struct AsyncReadbackSlot
|
struct AsyncReadbackSlot
|
||||||
{
|
{
|
||||||
GLuint pixelPackBuffer = 0;
|
GLuint pixelPackBuffer = 0;
|
||||||
@@ -44,15 +51,34 @@ private:
|
|||||||
bool inFlight = false;
|
bool inFlight = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct OutputReadbackTiming
|
||||||
|
{
|
||||||
|
double fenceWaitMilliseconds = 0.0;
|
||||||
|
double mapMilliseconds = 0.0;
|
||||||
|
double copyMilliseconds = 0.0;
|
||||||
|
double cachedCopyMilliseconds = 0.0;
|
||||||
|
double asyncQueueMilliseconds = 0.0;
|
||||||
|
double asyncQueueBufferMilliseconds = 0.0;
|
||||||
|
double asyncQueueSetupMilliseconds = 0.0;
|
||||||
|
double asyncQueueReadPixelsMilliseconds = 0.0;
|
||||||
|
double asyncQueueFenceMilliseconds = 0.0;
|
||||||
|
double syncReadMilliseconds = 0.0;
|
||||||
|
bool asyncReadbackMissed = false;
|
||||||
|
bool cachedFallbackUsed = false;
|
||||||
|
bool syncFallbackUsed = false;
|
||||||
|
};
|
||||||
|
|
||||||
bool EnsureAsyncReadbackBuffers(std::size_t requiredBytes);
|
bool EnsureAsyncReadbackBuffers(std::size_t requiredBytes);
|
||||||
void ResetAsyncReadbackState();
|
void ResetAsyncReadbackState();
|
||||||
void FlushAsyncReadbackPipeline();
|
void FlushAsyncReadbackPipeline();
|
||||||
void QueueAsyncReadback(const VideoIOState& state);
|
bool QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing);
|
||||||
bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds);
|
bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds, OutputReadbackTiming& timing);
|
||||||
void CacheOutputFrame(const VideoIOOutputFrame& outputFrame);
|
void CacheOutputFrame(const VideoIOOutputFrame& outputFrame);
|
||||||
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes);
|
bool TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const;
|
||||||
|
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing);
|
||||||
void PackOutputFor10Bit(const VideoIOState& state);
|
void PackOutputFor10Bit(const VideoIOState& state);
|
||||||
void ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
|
OutputReadbackTiming ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
|
||||||
|
static OutputReadbackMode ReadOutputReadbackModeFromEnvironment();
|
||||||
|
|
||||||
OpenGLRenderer& mRenderer;
|
OpenGLRenderer& mRenderer;
|
||||||
RuntimeSnapshotProvider& mRuntimeSnapshotProvider;
|
RuntimeSnapshotProvider& mRuntimeSnapshotProvider;
|
||||||
@@ -60,6 +86,7 @@ private:
|
|||||||
RenderEffectCallback mRenderEffect;
|
RenderEffectCallback mRenderEffect;
|
||||||
OutputReadyCallback mOutputReady;
|
OutputReadyCallback mOutputReady;
|
||||||
PaintCallback mPaint;
|
PaintCallback mPaint;
|
||||||
|
OutputReadbackMode mOutputReadbackMode = OutputReadbackMode::AsyncPbo;
|
||||||
std::array<AsyncReadbackSlot, 3> mAsyncReadbackSlots;
|
std::array<AsyncReadbackSlot, 3> mAsyncReadbackSlots;
|
||||||
std::size_t mAsyncReadbackWriteIndex = 0;
|
std::size_t mAsyncReadbackWriteIndex = 0;
|
||||||
std::size_t mAsyncReadbackReadIndex = 0;
|
std::size_t mAsyncReadbackReadIndex = 0;
|
||||||
|
|||||||
@@ -95,6 +95,24 @@ JsonValue RuntimeStatePresenter::BuildRuntimeStateValue(const RuntimeStore& runt
|
|||||||
outputRender.set("renderMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderMilliseconds));
|
outputRender.set("renderMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderMilliseconds));
|
||||||
outputRender.set("smoothedRenderMs", JsonValue(telemetrySnapshot.backendPlayout.smoothedOutputRenderMilliseconds));
|
outputRender.set("smoothedRenderMs", JsonValue(telemetrySnapshot.backendPlayout.smoothedOutputRenderMilliseconds));
|
||||||
outputRender.set("maxRenderMs", JsonValue(telemetrySnapshot.backendPlayout.maxOutputRenderMilliseconds));
|
outputRender.set("maxRenderMs", JsonValue(telemetrySnapshot.backendPlayout.maxOutputRenderMilliseconds));
|
||||||
|
outputRender.set("acquireFrameMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameAcquireMilliseconds));
|
||||||
|
outputRender.set("renderRequestMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameRenderRequestMilliseconds));
|
||||||
|
outputRender.set("endAccessMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameEndAccessMilliseconds));
|
||||||
|
outputRender.set("queueWaitMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderQueueWaitMilliseconds));
|
||||||
|
outputRender.set("drawMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderDrawMilliseconds));
|
||||||
|
outputRender.set("fenceWaitMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackFenceWaitMilliseconds));
|
||||||
|
outputRender.set("mapMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackMapMilliseconds));
|
||||||
|
outputRender.set("readbackCopyMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackCopyMilliseconds));
|
||||||
|
outputRender.set("cachedCopyMs", JsonValue(telemetrySnapshot.backendPlayout.outputCachedCopyMilliseconds));
|
||||||
|
outputRender.set("asyncQueueMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueMilliseconds));
|
||||||
|
outputRender.set("asyncQueueBufferMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueBufferMilliseconds));
|
||||||
|
outputRender.set("asyncQueueSetupMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueSetupMilliseconds));
|
||||||
|
outputRender.set("asyncQueueReadPixelsMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueReadPixelsMilliseconds));
|
||||||
|
outputRender.set("asyncQueueFenceMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueFenceMilliseconds));
|
||||||
|
outputRender.set("syncReadMs", JsonValue(telemetrySnapshot.backendPlayout.outputSyncReadMilliseconds));
|
||||||
|
outputRender.set("asyncReadbackMissCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputAsyncReadbackMissCount)));
|
||||||
|
outputRender.set("cachedFallbackCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputCachedFallbackCount)));
|
||||||
|
outputRender.set("syncFallbackCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputSyncFallbackCount)));
|
||||||
|
|
||||||
JsonValue recovery = JsonValue::MakeObject();
|
JsonValue recovery = JsonValue::MakeObject();
|
||||||
recovery.set("completionResult", JsonValue(telemetrySnapshot.backendPlayout.completionResult));
|
recovery.set("completionResult", JsonValue(telemetrySnapshot.backendPlayout.completionResult));
|
||||||
|
|||||||
@@ -212,6 +212,7 @@ void HealthTelemetry::RecordBackendPlayoutHealth(const std::string& lifecycleSta
|
|||||||
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
||||||
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
||||||
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
||||||
|
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
|
||||||
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
||||||
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
||||||
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
||||||
@@ -232,6 +233,9 @@ void HealthTelemetry::RecordBackendPlayoutHealth(const std::string& lifecycleSta
|
|||||||
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
|
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
|
||||||
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
|
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
|
||||||
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
|
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputFrameAcquireMilliseconds = std::max(outputFrameAcquireMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputFrameRenderRequestMilliseconds = std::max(outputFrameRenderRequestMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputFrameEndAccessMilliseconds = std::max(outputFrameEndAccessMilliseconds, 0.0);
|
||||||
mBackendPlayout.completedFrameIndex = completedFrameIndex;
|
mBackendPlayout.completedFrameIndex = completedFrameIndex;
|
||||||
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
|
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
|
||||||
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
|
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
|
||||||
@@ -251,6 +255,7 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
|
|||||||
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
||||||
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
||||||
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
||||||
|
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
|
||||||
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
||||||
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
||||||
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
||||||
@@ -274,6 +279,9 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
|
|||||||
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
|
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
|
||||||
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
|
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
|
||||||
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
|
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputFrameAcquireMilliseconds = std::max(outputFrameAcquireMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputFrameRenderRequestMilliseconds = std::max(outputFrameRenderRequestMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputFrameEndAccessMilliseconds = std::max(outputFrameEndAccessMilliseconds, 0.0);
|
||||||
mBackendPlayout.completedFrameIndex = completedFrameIndex;
|
mBackendPlayout.completedFrameIndex = completedFrameIndex;
|
||||||
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
|
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
|
||||||
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
|
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
|
||||||
@@ -289,6 +297,98 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HealthTelemetry::RecordOutputRenderQueueWait(double queueWaitMilliseconds)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mMutex);
|
||||||
|
mBackendPlayout.outputRenderQueueWaitMilliseconds = std::max(queueWaitMilliseconds, 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HealthTelemetry::TryRecordOutputRenderQueueWait(double queueWaitMilliseconds)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
|
||||||
|
if (!lock.owns_lock())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
mBackendPlayout.outputRenderQueueWaitMilliseconds = std::max(queueWaitMilliseconds, 0.0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HealthTelemetry::RecordOutputRenderPipelineTiming(
|
||||||
|
double drawMilliseconds,
|
||||||
|
double fenceWaitMilliseconds,
|
||||||
|
double mapMilliseconds,
|
||||||
|
double readbackCopyMilliseconds,
|
||||||
|
double cachedCopyMilliseconds,
|
||||||
|
double asyncQueueMilliseconds,
|
||||||
|
double asyncQueueBufferMilliseconds,
|
||||||
|
double asyncQueueSetupMilliseconds,
|
||||||
|
double asyncQueueReadPixelsMilliseconds,
|
||||||
|
double asyncQueueFenceMilliseconds,
|
||||||
|
double syncReadMilliseconds,
|
||||||
|
bool asyncReadbackMissed,
|
||||||
|
bool cachedFallbackUsed,
|
||||||
|
bool syncFallbackUsed)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mMutex);
|
||||||
|
mBackendPlayout.outputRenderDrawMilliseconds = std::max(drawMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputReadbackFenceWaitMilliseconds = std::max(fenceWaitMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputReadbackMapMilliseconds = std::max(mapMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputReadbackCopyMilliseconds = std::max(readbackCopyMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputCachedCopyMilliseconds = std::max(cachedCopyMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueMilliseconds = std::max(asyncQueueMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueBufferMilliseconds = std::max(asyncQueueBufferMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueSetupMilliseconds = std::max(asyncQueueSetupMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueReadPixelsMilliseconds = std::max(asyncQueueReadPixelsMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueFenceMilliseconds = std::max(asyncQueueFenceMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputSyncReadMilliseconds = std::max(syncReadMilliseconds, 0.0);
|
||||||
|
if (asyncReadbackMissed)
|
||||||
|
++mBackendPlayout.outputAsyncReadbackMissCount;
|
||||||
|
if (cachedFallbackUsed)
|
||||||
|
++mBackendPlayout.outputCachedFallbackCount;
|
||||||
|
if (syncFallbackUsed)
|
||||||
|
++mBackendPlayout.outputSyncFallbackCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HealthTelemetry::TryRecordOutputRenderPipelineTiming(
|
||||||
|
double drawMilliseconds,
|
||||||
|
double fenceWaitMilliseconds,
|
||||||
|
double mapMilliseconds,
|
||||||
|
double readbackCopyMilliseconds,
|
||||||
|
double cachedCopyMilliseconds,
|
||||||
|
double asyncQueueMilliseconds,
|
||||||
|
double asyncQueueBufferMilliseconds,
|
||||||
|
double asyncQueueSetupMilliseconds,
|
||||||
|
double asyncQueueReadPixelsMilliseconds,
|
||||||
|
double asyncQueueFenceMilliseconds,
|
||||||
|
double syncReadMilliseconds,
|
||||||
|
bool asyncReadbackMissed,
|
||||||
|
bool cachedFallbackUsed,
|
||||||
|
bool syncFallbackUsed)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
|
||||||
|
if (!lock.owns_lock())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
mBackendPlayout.outputRenderDrawMilliseconds = std::max(drawMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputReadbackFenceWaitMilliseconds = std::max(fenceWaitMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputReadbackMapMilliseconds = std::max(mapMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputReadbackCopyMilliseconds = std::max(readbackCopyMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputCachedCopyMilliseconds = std::max(cachedCopyMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueMilliseconds = std::max(asyncQueueMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueBufferMilliseconds = std::max(asyncQueueBufferMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueSetupMilliseconds = std::max(asyncQueueSetupMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueReadPixelsMilliseconds = std::max(asyncQueueReadPixelsMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputAsyncQueueFenceMilliseconds = std::max(asyncQueueFenceMilliseconds, 0.0);
|
||||||
|
mBackendPlayout.outputSyncReadMilliseconds = std::max(syncReadMilliseconds, 0.0);
|
||||||
|
if (asyncReadbackMissed)
|
||||||
|
++mBackendPlayout.outputAsyncReadbackMissCount;
|
||||||
|
if (cachedFallbackUsed)
|
||||||
|
++mBackendPlayout.outputCachedFallbackCount;
|
||||||
|
if (syncFallbackUsed)
|
||||||
|
++mBackendPlayout.outputSyncFallbackCount;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
HealthTelemetry::SignalStatusSnapshot HealthTelemetry::GetSignalStatusSnapshot() const
|
HealthTelemetry::SignalStatusSnapshot HealthTelemetry::GetSignalStatusSnapshot() const
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(mMutex);
|
std::lock_guard<std::mutex> lock(mMutex);
|
||||||
|
|||||||
@@ -97,6 +97,24 @@ public:
|
|||||||
double outputRenderMilliseconds = 0.0;
|
double outputRenderMilliseconds = 0.0;
|
||||||
double smoothedOutputRenderMilliseconds = 0.0;
|
double smoothedOutputRenderMilliseconds = 0.0;
|
||||||
double maxOutputRenderMilliseconds = 0.0;
|
double maxOutputRenderMilliseconds = 0.0;
|
||||||
|
double outputFrameAcquireMilliseconds = 0.0;
|
||||||
|
double outputFrameRenderRequestMilliseconds = 0.0;
|
||||||
|
double outputFrameEndAccessMilliseconds = 0.0;
|
||||||
|
double outputRenderQueueWaitMilliseconds = 0.0;
|
||||||
|
double outputRenderDrawMilliseconds = 0.0;
|
||||||
|
double outputReadbackFenceWaitMilliseconds = 0.0;
|
||||||
|
double outputReadbackMapMilliseconds = 0.0;
|
||||||
|
double outputReadbackCopyMilliseconds = 0.0;
|
||||||
|
double outputCachedCopyMilliseconds = 0.0;
|
||||||
|
double outputAsyncQueueMilliseconds = 0.0;
|
||||||
|
double outputAsyncQueueBufferMilliseconds = 0.0;
|
||||||
|
double outputAsyncQueueSetupMilliseconds = 0.0;
|
||||||
|
double outputAsyncQueueReadPixelsMilliseconds = 0.0;
|
||||||
|
double outputAsyncQueueFenceMilliseconds = 0.0;
|
||||||
|
double outputSyncReadMilliseconds = 0.0;
|
||||||
|
uint64_t outputAsyncReadbackMissCount = 0;
|
||||||
|
uint64_t outputCachedFallbackCount = 0;
|
||||||
|
uint64_t outputSyncFallbackCount = 0;
|
||||||
uint64_t completedFrameIndex = 0;
|
uint64_t completedFrameIndex = 0;
|
||||||
uint64_t scheduledFrameIndex = 0;
|
uint64_t scheduledFrameIndex = 0;
|
||||||
uint64_t scheduledLeadFrames = 0;
|
uint64_t scheduledLeadFrames = 0;
|
||||||
@@ -161,6 +179,7 @@ public:
|
|||||||
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
||||||
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
||||||
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
||||||
|
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
|
||||||
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
||||||
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
||||||
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
||||||
@@ -170,11 +189,46 @@ public:
|
|||||||
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
||||||
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
||||||
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
||||||
|
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
|
||||||
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
||||||
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
||||||
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
||||||
bool degraded, const std::string& statusMessage);
|
bool degraded, const std::string& statusMessage);
|
||||||
|
|
||||||
|
void RecordOutputRenderQueueWait(double queueWaitMilliseconds);
|
||||||
|
bool TryRecordOutputRenderQueueWait(double queueWaitMilliseconds);
|
||||||
|
|
||||||
|
void RecordOutputRenderPipelineTiming(
|
||||||
|
double drawMilliseconds,
|
||||||
|
double fenceWaitMilliseconds,
|
||||||
|
double mapMilliseconds,
|
||||||
|
double readbackCopyMilliseconds,
|
||||||
|
double cachedCopyMilliseconds,
|
||||||
|
double asyncQueueMilliseconds,
|
||||||
|
double asyncQueueBufferMilliseconds,
|
||||||
|
double asyncQueueSetupMilliseconds,
|
||||||
|
double asyncQueueReadPixelsMilliseconds,
|
||||||
|
double asyncQueueFenceMilliseconds,
|
||||||
|
double syncReadMilliseconds,
|
||||||
|
bool asyncReadbackMissed,
|
||||||
|
bool cachedFallbackUsed,
|
||||||
|
bool syncFallbackUsed);
|
||||||
|
bool TryRecordOutputRenderPipelineTiming(
|
||||||
|
double drawMilliseconds,
|
||||||
|
double fenceWaitMilliseconds,
|
||||||
|
double mapMilliseconds,
|
||||||
|
double readbackCopyMilliseconds,
|
||||||
|
double cachedCopyMilliseconds,
|
||||||
|
double asyncQueueMilliseconds,
|
||||||
|
double asyncQueueBufferMilliseconds,
|
||||||
|
double asyncQueueSetupMilliseconds,
|
||||||
|
double asyncQueueReadPixelsMilliseconds,
|
||||||
|
double asyncQueueFenceMilliseconds,
|
||||||
|
double syncReadMilliseconds,
|
||||||
|
bool asyncReadbackMissed,
|
||||||
|
bool cachedFallbackUsed,
|
||||||
|
bool syncFallbackUsed);
|
||||||
|
|
||||||
SignalStatusSnapshot GetSignalStatusSnapshot() const;
|
SignalStatusSnapshot GetSignalStatusSnapshot() const;
|
||||||
VideoIOStatusSnapshot GetVideoIOStatusSnapshot() const;
|
VideoIOStatusSnapshot GetVideoIOStatusSnapshot() const;
|
||||||
PerformanceSnapshot GetPerformanceSnapshot() const;
|
PerformanceSnapshot GetPerformanceSnapshot() const;
|
||||||
|
|||||||
@@ -9,12 +9,14 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <cmath>
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
|
||||||
VideoBackend::VideoBackend(RenderEngine& renderEngine, HealthTelemetry& healthTelemetry, RuntimeEventDispatcher& runtimeEventDispatcher) :
|
VideoBackend::VideoBackend(RenderEngine& renderEngine, HealthTelemetry& healthTelemetry, RuntimeEventDispatcher& runtimeEventDispatcher) :
|
||||||
mHealthTelemetry(healthTelemetry),
|
mHealthTelemetry(healthTelemetry),
|
||||||
mRuntimeEventDispatcher(runtimeEventDispatcher),
|
mRuntimeEventDispatcher(runtimeEventDispatcher),
|
||||||
mPlayoutPolicy(NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy())),
|
mPlayoutPolicy(NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy())),
|
||||||
|
mOutputProductionController(mPlayoutPolicy),
|
||||||
mReadyOutputQueue(mPlayoutPolicy),
|
mReadyOutputQueue(mPlayoutPolicy),
|
||||||
mVideoIODevice(std::make_unique<DeckLinkSession>()),
|
mVideoIODevice(std::make_unique<DeckLinkSession>()),
|
||||||
mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine))
|
mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine))
|
||||||
@@ -80,6 +82,7 @@ bool VideoBackend::ConfigureInput(const VideoFormat& inputVideoMode, std::string
|
|||||||
bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error)
|
bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error)
|
||||||
{
|
{
|
||||||
mPlayoutPolicy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy);
|
mPlayoutPolicy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy);
|
||||||
|
mOutputProductionController.Configure(mPlayoutPolicy);
|
||||||
mReadyOutputQueue.Configure(mPlayoutPolicy);
|
mReadyOutputQueue.Configure(mPlayoutPolicy);
|
||||||
if (mLifecycle.State() != VideoBackendLifecycleState::Configuring)
|
if (mLifecycle.State() != VideoBackendLifecycleState::Configuring)
|
||||||
ApplyLifecycleTransition(VideoBackendLifecycleState::Configuring, "Configuring video backend output.");
|
ApplyLifecycleTransition(VideoBackendLifecycleState::Configuring, "Configuring video backend output.");
|
||||||
@@ -101,7 +104,10 @@ bool VideoBackend::Start()
|
|||||||
StartOutputCompletionWorker();
|
StartOutputCompletionWorker();
|
||||||
const bool started = mVideoIODevice->Start();
|
const bool started = mVideoIODevice->Start();
|
||||||
if (started)
|
if (started)
|
||||||
|
{
|
||||||
|
StartOutputProducerWorker();
|
||||||
ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started.");
|
ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started.");
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
StopOutputCompletionWorker();
|
StopOutputCompletionWorker();
|
||||||
@@ -113,6 +119,7 @@ bool VideoBackend::Start()
|
|||||||
bool VideoBackend::Stop()
|
bool VideoBackend::Stop()
|
||||||
{
|
{
|
||||||
ApplyLifecycleTransition(VideoBackendLifecycleState::Stopping, "Video backend stopping.");
|
ApplyLifecycleTransition(VideoBackendLifecycleState::Stopping, "Video backend stopping.");
|
||||||
|
StopOutputProducerWorker();
|
||||||
const bool stopped = mVideoIODevice->Stop();
|
const bool stopped = mVideoIODevice->Stop();
|
||||||
StopOutputCompletionWorker();
|
StopOutputCompletionWorker();
|
||||||
if (stopped)
|
if (stopped)
|
||||||
@@ -232,6 +239,12 @@ const std::string& VideoBackend::StatusMessage() const
|
|||||||
return mVideoIODevice->StatusMessage();
|
return mVideoIODevice->StatusMessage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VideoBackend::ShouldPrioritizeOutputOverPreview() const
|
||||||
|
{
|
||||||
|
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
|
||||||
|
return metrics.depth < static_cast<std::size_t>(mPlayoutPolicy.targetReadyFrames);
|
||||||
|
}
|
||||||
|
|
||||||
void VideoBackend::SetStatusMessage(const std::string& message)
|
void VideoBackend::SetStatusMessage(const std::string& message)
|
||||||
{
|
{
|
||||||
mVideoIODevice->SetStatusMessage(message);
|
mVideoIODevice->SetStatusMessage(message);
|
||||||
@@ -287,6 +300,7 @@ void VideoBackend::HandleOutputFrameCompletion(const VideoIOCompletion& completi
|
|||||||
}
|
}
|
||||||
|
|
||||||
void VideoBackend::StartOutputCompletionWorker()
|
void VideoBackend::StartOutputCompletionWorker()
|
||||||
|
{
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
|
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
|
||||||
if (mOutputCompletionWorkerRunning)
|
if (mOutputCompletionWorkerRunning)
|
||||||
@@ -302,13 +316,21 @@ void VideoBackend::StartOutputCompletionWorker()
|
|||||||
mOutputRenderMilliseconds = 0.0;
|
mOutputRenderMilliseconds = 0.0;
|
||||||
mSmoothedOutputRenderMilliseconds = 0.0;
|
mSmoothedOutputRenderMilliseconds = 0.0;
|
||||||
mMaxOutputRenderMilliseconds = 0.0;
|
mMaxOutputRenderMilliseconds = 0.0;
|
||||||
|
mOutputFrameAcquireMilliseconds = 0.0;
|
||||||
|
mOutputFrameRenderRequestMilliseconds = 0.0;
|
||||||
|
mOutputFrameEndAccessMilliseconds = 0.0;
|
||||||
|
mLastLateStreak = 0;
|
||||||
|
mLastDropStreak = 0;
|
||||||
mOutputCompletionWorkerStopping = false;
|
mOutputCompletionWorkerStopping = false;
|
||||||
mOutputCompletionWorkerRunning = true;
|
mOutputCompletionWorkerRunning = true;
|
||||||
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
|
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void VideoBackend::StopOutputCompletionWorker()
|
void VideoBackend::StopOutputCompletionWorker()
|
||||||
{
|
{
|
||||||
|
StopOutputProducerWorker();
|
||||||
|
|
||||||
bool shouldJoin = false;
|
bool shouldJoin = false;
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
|
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
|
||||||
@@ -322,6 +344,40 @@ void VideoBackend::StopOutputCompletionWorker()
|
|||||||
mOutputCompletionWorker.join();
|
mOutputCompletionWorker.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VideoBackend::StartOutputProducerWorker()
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
|
||||||
|
if (mOutputProducerWorkerRunning)
|
||||||
|
return;
|
||||||
|
|
||||||
|
mLastOutputProductionCompletion = VideoIOCompletion();
|
||||||
|
mLastOutputProductionTime = std::chrono::steady_clock::time_point();
|
||||||
|
mOutputProducerWorkerStopping = false;
|
||||||
|
mOutputProducerWorkerRunning = true;
|
||||||
|
mOutputProducerWorker = std::thread(&VideoBackend::OutputProducerWorkerMain, this);
|
||||||
|
mOutputProducerCondition.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
void VideoBackend::StopOutputProducerWorker()
|
||||||
|
{
|
||||||
|
bool shouldJoin = false;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
|
||||||
|
if (mOutputProducerWorkerRunning)
|
||||||
|
mOutputProducerWorkerStopping = true;
|
||||||
|
shouldJoin = mOutputProducerWorker.joinable();
|
||||||
|
}
|
||||||
|
mOutputProducerCondition.notify_one();
|
||||||
|
|
||||||
|
if (shouldJoin)
|
||||||
|
mOutputProducerWorker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
void VideoBackend::NotifyOutputProducer()
|
||||||
|
{
|
||||||
|
mOutputProducerCondition.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
void VideoBackend::OutputCompletionWorkerMain()
|
void VideoBackend::OutputCompletionWorkerMain()
|
||||||
{
|
{
|
||||||
for (;;)
|
for (;;)
|
||||||
@@ -351,6 +407,57 @@ void VideoBackend::OutputCompletionWorkerMain()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VideoBackend::OutputProducerWorkerMain()
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mOutputProducerMutex);
|
||||||
|
mOutputProducerCondition.wait_for(lock, OutputProducerWakeInterval());
|
||||||
|
|
||||||
|
if (mOutputProducerWorkerStopping)
|
||||||
|
{
|
||||||
|
mOutputProducerWorkerRunning = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
|
||||||
|
RecordReadyQueueDepthSample(metrics);
|
||||||
|
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
|
||||||
|
if (decision.action != OutputProductionAction::Produce || decision.requestedFrames == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
VideoIOCompletion completion;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
|
||||||
|
if (mOutputProducerWorkerStopping)
|
||||||
|
continue;
|
||||||
|
completion = mLastOutputProductionCompletion;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto now = std::chrono::steady_clock::now();
|
||||||
|
if (mLastOutputProductionTime != std::chrono::steady_clock::time_point() &&
|
||||||
|
now - mLastOutputProductionTime < OutputProducerWakeInterval())
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ProduceReadyOutputFrames(completion, 1) > 0)
|
||||||
|
mLastOutputProductionTime = std::chrono::steady_clock::now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::chrono::milliseconds VideoBackend::OutputProducerWakeInterval() const
|
||||||
|
{
|
||||||
|
const double frameBudgetMilliseconds = State().frameBudgetMilliseconds;
|
||||||
|
if (frameBudgetMilliseconds <= 0.0)
|
||||||
|
return std::chrono::milliseconds(8);
|
||||||
|
|
||||||
|
const int intervalMilliseconds = (std::max)(1, static_cast<int>(std::floor(frameBudgetMilliseconds * 0.75)));
|
||||||
|
return std::chrono::milliseconds(intervalMilliseconds);
|
||||||
|
}
|
||||||
|
|
||||||
void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion)
|
void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion)
|
||||||
{
|
{
|
||||||
RecordFramePacing(completion.result);
|
RecordFramePacing(completion.result);
|
||||||
@@ -358,31 +465,69 @@ void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& complet
|
|||||||
const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics();
|
const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics();
|
||||||
RecordReadyQueueDepthSample(initialQueueMetrics);
|
RecordReadyQueueDepthSample(initialQueueMetrics);
|
||||||
const VideoPlayoutRecoveryDecision recoveryDecision = AccountForCompletionResult(completion.result, initialQueueMetrics.depth);
|
const VideoPlayoutRecoveryDecision recoveryDecision = AccountForCompletionResult(completion.result, initialQueueMetrics.depth);
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||||
|
mLastLateStreak = recoveryDecision.lateStreak;
|
||||||
|
mLastDropStreak = recoveryDecision.dropStreak;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
|
||||||
|
mLastOutputProductionCompletion = completion;
|
||||||
|
}
|
||||||
|
NotifyOutputProducer();
|
||||||
|
|
||||||
FillReadyOutputQueue(completion);
|
if (!ScheduleReadyOutputFrame() &&
|
||||||
if (!ScheduleReadyOutputFrame())
|
(ProduceReadyOutputFrames(completion, 1) == 0 || !ScheduleReadyOutputFrame()))
|
||||||
|
{
|
||||||
ScheduleBlackUnderrunFrame();
|
ScheduleBlackUnderrunFrame();
|
||||||
|
}
|
||||||
|
NotifyOutputProducer();
|
||||||
RecordBackendPlayoutHealth(completion.result, recoveryDecision);
|
RecordBackendPlayoutHealth(completion.result, recoveryDecision);
|
||||||
}
|
}
|
||||||
|
|
||||||
void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision)
|
void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision)
|
||||||
{
|
{
|
||||||
const RenderOutputQueueMetrics queueMetrics = mReadyOutputQueue.GetMetrics();
|
const RenderOutputQueueMetrics queueMetrics = mReadyOutputQueue.GetMetrics();
|
||||||
|
std::size_t minReadyQueueDepth = 0;
|
||||||
|
std::size_t maxReadyQueueDepth = 0;
|
||||||
|
uint64_t readyQueueZeroDepthCount = 0;
|
||||||
|
double outputRenderMilliseconds = 0.0;
|
||||||
|
double smoothedOutputRenderMilliseconds = 0.0;
|
||||||
|
double maxOutputRenderMilliseconds = 0.0;
|
||||||
|
double outputFrameAcquireMilliseconds = 0.0;
|
||||||
|
double outputFrameRenderRequestMilliseconds = 0.0;
|
||||||
|
double outputFrameEndAccessMilliseconds = 0.0;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||||
|
minReadyQueueDepth = mMinReadyQueueDepth;
|
||||||
|
maxReadyQueueDepth = mMaxReadyQueueDepth;
|
||||||
|
readyQueueZeroDepthCount = mReadyQueueZeroDepthCount;
|
||||||
|
outputRenderMilliseconds = mOutputRenderMilliseconds;
|
||||||
|
smoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds;
|
||||||
|
maxOutputRenderMilliseconds = mMaxOutputRenderMilliseconds;
|
||||||
|
outputFrameAcquireMilliseconds = mOutputFrameAcquireMilliseconds;
|
||||||
|
outputFrameRenderRequestMilliseconds = mOutputFrameRenderRequestMilliseconds;
|
||||||
|
outputFrameEndAccessMilliseconds = mOutputFrameEndAccessMilliseconds;
|
||||||
|
}
|
||||||
|
|
||||||
mHealthTelemetry.TryRecordBackendPlayoutHealth(
|
mHealthTelemetry.TryRecordBackendPlayoutHealth(
|
||||||
VideoBackendLifecycle::StateName(mLifecycle.State()),
|
VideoBackendLifecycle::StateName(mLifecycle.State()),
|
||||||
CompletionResultName(result),
|
CompletionResultName(result),
|
||||||
queueMetrics.depth,
|
queueMetrics.depth,
|
||||||
queueMetrics.capacity,
|
queueMetrics.capacity,
|
||||||
queueMetrics.pushedCount,
|
queueMetrics.pushedCount,
|
||||||
mMinReadyQueueDepth,
|
minReadyQueueDepth,
|
||||||
mMaxReadyQueueDepth,
|
maxReadyQueueDepth,
|
||||||
mReadyQueueZeroDepthCount,
|
readyQueueZeroDepthCount,
|
||||||
queueMetrics.poppedCount,
|
queueMetrics.poppedCount,
|
||||||
queueMetrics.droppedCount,
|
queueMetrics.droppedCount,
|
||||||
queueMetrics.underrunCount,
|
queueMetrics.underrunCount,
|
||||||
mOutputRenderMilliseconds,
|
outputRenderMilliseconds,
|
||||||
mSmoothedOutputRenderMilliseconds,
|
smoothedOutputRenderMilliseconds,
|
||||||
mMaxOutputRenderMilliseconds,
|
maxOutputRenderMilliseconds,
|
||||||
|
outputFrameAcquireMilliseconds,
|
||||||
|
outputFrameRenderRequestMilliseconds,
|
||||||
|
outputFrameEndAccessMilliseconds,
|
||||||
recoveryDecision.completedFrameIndex,
|
recoveryDecision.completedFrameIndex,
|
||||||
recoveryDecision.scheduledFrameIndex,
|
recoveryDecision.scheduledFrameIndex,
|
||||||
recoveryDecision.scheduledLeadFrames,
|
recoveryDecision.scheduledLeadFrames,
|
||||||
@@ -397,50 +542,93 @@ void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, co
|
|||||||
StatusMessage());
|
StatusMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VideoBackend::FillReadyOutputQueue(const VideoIOCompletion& completion)
|
std::size_t VideoBackend::ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames)
|
||||||
{
|
{
|
||||||
|
if (maxFrames == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> productionLock(mOutputProductionMutex);
|
||||||
RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
|
RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
|
||||||
bool filledAny = false;
|
std::size_t producedFrames = 0;
|
||||||
while (metrics.depth < mPlayoutPolicy.targetReadyFrames)
|
while (producedFrames < maxFrames)
|
||||||
{
|
{
|
||||||
|
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
|
||||||
|
if (decision.action != OutputProductionAction::Produce)
|
||||||
|
break;
|
||||||
|
|
||||||
if (!RenderReadyOutputFrame(mVideoIODevice->State(), completion))
|
if (!RenderReadyOutputFrame(mVideoIODevice->State(), completion))
|
||||||
return filledAny;
|
break;
|
||||||
filledAny = true;
|
++producedFrames;
|
||||||
metrics = mReadyOutputQueue.GetMetrics();
|
metrics = mReadyOutputQueue.GetMetrics();
|
||||||
RecordReadyQueueDepthSample(metrics);
|
RecordReadyQueueDepthSample(metrics);
|
||||||
}
|
}
|
||||||
return true;
|
return producedFrames;
|
||||||
|
}
|
||||||
|
|
||||||
|
OutputProductionPressure VideoBackend::BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const
|
||||||
|
{
|
||||||
|
OutputProductionPressure pressure;
|
||||||
|
pressure.readyQueueDepth = metrics.depth;
|
||||||
|
pressure.readyQueueCapacity = metrics.capacity;
|
||||||
|
pressure.readyQueueUnderrunCount = metrics.underrunCount;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||||
|
pressure.lateStreak = mLastLateStreak;
|
||||||
|
pressure.dropStreak = mLastDropStreak;
|
||||||
|
}
|
||||||
|
return pressure;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion)
|
bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion)
|
||||||
{
|
{
|
||||||
const auto renderStart = std::chrono::steady_clock::now();
|
const auto renderStart = std::chrono::steady_clock::now();
|
||||||
VideoIOOutputFrame outputFrame;
|
VideoIOOutputFrame outputFrame;
|
||||||
|
const auto acquireStart = std::chrono::steady_clock::now();
|
||||||
if (!BeginOutputFrame(outputFrame))
|
if (!BeginOutputFrame(outputFrame))
|
||||||
return false;
|
return false;
|
||||||
|
const auto acquireEnd = std::chrono::steady_clock::now();
|
||||||
|
|
||||||
bool rendered = true;
|
bool rendered = true;
|
||||||
|
const auto renderRequestStart = std::chrono::steady_clock::now();
|
||||||
if (mBridge)
|
if (mBridge)
|
||||||
rendered = mBridge->RenderScheduledFrame(state, completion, outputFrame);
|
rendered = mBridge->RenderScheduledFrame(state, completion, outputFrame);
|
||||||
|
const auto renderRequestEnd = std::chrono::steady_clock::now();
|
||||||
|
|
||||||
|
const auto endAccessStart = std::chrono::steady_clock::now();
|
||||||
EndOutputFrame(outputFrame);
|
EndOutputFrame(outputFrame);
|
||||||
|
const auto endAccessEnd = std::chrono::steady_clock::now();
|
||||||
|
const double acquireMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(acquireEnd - acquireStart).count();
|
||||||
|
const double renderRequestMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderRequestEnd - renderRequestStart).count();
|
||||||
|
const double endAccessMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(endAccessEnd - endAccessStart).count();
|
||||||
|
|
||||||
if (!rendered)
|
if (!rendered)
|
||||||
{
|
{
|
||||||
ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame.");
|
ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame.");
|
||||||
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
|
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
|
||||||
std::chrono::steady_clock::now() - renderStart).count();
|
std::chrono::steady_clock::now() - renderStart).count();
|
||||||
RecordOutputRenderDuration(renderMilliseconds);
|
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
|
||||||
|
if (outputFrame.nativeFrame != nullptr)
|
||||||
|
{
|
||||||
|
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
|
||||||
|
outputFrame.nativeFrame = nullptr;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
|
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
|
||||||
std::chrono::steady_clock::now() - renderStart).count();
|
std::chrono::steady_clock::now() - renderStart).count();
|
||||||
RecordOutputRenderDuration(renderMilliseconds);
|
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
|
||||||
|
|
||||||
RenderOutputFrame readyFrame;
|
RenderOutputFrame readyFrame;
|
||||||
readyFrame.frame = outputFrame;
|
readyFrame.frame = outputFrame;
|
||||||
readyFrame.frameIndex = ++mNextReadyOutputFrameIndex;
|
readyFrame.frameIndex = ++mNextReadyOutputFrameIndex;
|
||||||
return mReadyOutputQueue.Push(readyFrame);
|
const bool pushed = mReadyOutputQueue.Push(readyFrame);
|
||||||
|
if (!pushed && outputFrame.nativeFrame != nullptr)
|
||||||
|
{
|
||||||
|
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
|
||||||
|
outputFrame.nativeFrame = nullptr;
|
||||||
|
}
|
||||||
|
return pushed;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VideoBackend::ScheduleReadyOutputFrame()
|
bool VideoBackend::ScheduleReadyOutputFrame()
|
||||||
@@ -516,6 +704,7 @@ void VideoBackend::RecordFramePacing(VideoIOCompletionResult completionResult)
|
|||||||
|
|
||||||
void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics)
|
void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics)
|
||||||
{
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||||
if (!mHasReadyQueueDepthBaseline)
|
if (!mHasReadyQueueDepthBaseline)
|
||||||
{
|
{
|
||||||
mHasReadyQueueDepthBaseline = true;
|
mHasReadyQueueDepthBaseline = true;
|
||||||
@@ -532,14 +721,18 @@ void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& m
|
|||||||
++mReadyQueueZeroDepthCount;
|
++mReadyQueueZeroDepthCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds)
|
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds)
|
||||||
{
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||||
mOutputRenderMilliseconds = (std::max)(renderMilliseconds, 0.0);
|
mOutputRenderMilliseconds = (std::max)(renderMilliseconds, 0.0);
|
||||||
if (mSmoothedOutputRenderMilliseconds <= 0.0)
|
if (mSmoothedOutputRenderMilliseconds <= 0.0)
|
||||||
mSmoothedOutputRenderMilliseconds = mOutputRenderMilliseconds;
|
mSmoothedOutputRenderMilliseconds = mOutputRenderMilliseconds;
|
||||||
else
|
else
|
||||||
mSmoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds * 0.9 + mOutputRenderMilliseconds * 0.1;
|
mSmoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds * 0.9 + mOutputRenderMilliseconds * 0.1;
|
||||||
mMaxOutputRenderMilliseconds = (std::max)(mMaxOutputRenderMilliseconds, mOutputRenderMilliseconds);
|
mMaxOutputRenderMilliseconds = (std::max)(mMaxOutputRenderMilliseconds, mOutputRenderMilliseconds);
|
||||||
|
mOutputFrameAcquireMilliseconds = (std::max)(acquireMilliseconds, 0.0);
|
||||||
|
mOutputFrameRenderRequestMilliseconds = (std::max)(renderRequestMilliseconds, 0.0);
|
||||||
|
mOutputFrameEndAccessMilliseconds = (std::max)(endAccessMilliseconds, 0.0);
|
||||||
|
|
||||||
PublishTimingSample("VideoBackend", "outputRender", mOutputRenderMilliseconds, "ms");
|
PublishTimingSample("VideoBackend", "outputRender", mOutputRenderMilliseconds, "ms");
|
||||||
PublishTimingSample("VideoBackend", "smoothedOutputRender", mSmoothedOutputRenderMilliseconds, "ms");
|
PublishTimingSample("VideoBackend", "smoothedOutputRender", mSmoothedOutputRenderMilliseconds, "ms");
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "OutputProductionController.h"
|
||||||
#include "RenderOutputQueue.h"
|
#include "RenderOutputQueue.h"
|
||||||
#include "VideoBackendLifecycle.h"
|
#include "VideoBackendLifecycle.h"
|
||||||
#include "VideoIOTypes.h"
|
#include "VideoIOTypes.h"
|
||||||
@@ -59,6 +60,7 @@ public:
|
|||||||
bool KeyerInterfaceAvailable() const;
|
bool KeyerInterfaceAvailable() const;
|
||||||
bool ExternalKeyingActive() const;
|
bool ExternalKeyingActive() const;
|
||||||
const std::string& StatusMessage() const;
|
const std::string& StatusMessage() const;
|
||||||
|
bool ShouldPrioritizeOutputOverPreview() const;
|
||||||
void SetStatusMessage(const std::string& message);
|
void SetStatusMessage(const std::string& message);
|
||||||
void PublishStatus(bool externalKeyingConfigured, const std::string& statusMessage = std::string());
|
void PublishStatus(bool externalKeyingConfigured, const std::string& statusMessage = std::string());
|
||||||
void ReportNoInputDeviceSignalStatus();
|
void ReportNoInputDeviceSignalStatus();
|
||||||
@@ -69,14 +71,20 @@ private:
|
|||||||
void StartOutputCompletionWorker();
|
void StartOutputCompletionWorker();
|
||||||
void StopOutputCompletionWorker();
|
void StopOutputCompletionWorker();
|
||||||
void OutputCompletionWorkerMain();
|
void OutputCompletionWorkerMain();
|
||||||
|
void StartOutputProducerWorker();
|
||||||
|
void StopOutputProducerWorker();
|
||||||
|
void OutputProducerWorkerMain();
|
||||||
|
void NotifyOutputProducer();
|
||||||
|
std::chrono::milliseconds OutputProducerWakeInterval() const;
|
||||||
void ProcessOutputFrameCompletion(const VideoIOCompletion& completion);
|
void ProcessOutputFrameCompletion(const VideoIOCompletion& completion);
|
||||||
bool FillReadyOutputQueue(const VideoIOCompletion& completion);
|
std::size_t ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames);
|
||||||
|
OutputProductionPressure BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const;
|
||||||
bool RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion);
|
bool RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion);
|
||||||
bool ScheduleReadyOutputFrame();
|
bool ScheduleReadyOutputFrame();
|
||||||
bool ScheduleBlackUnderrunFrame();
|
bool ScheduleBlackUnderrunFrame();
|
||||||
void RecordFramePacing(VideoIOCompletionResult completionResult);
|
void RecordFramePacing(VideoIOCompletionResult completionResult);
|
||||||
void RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics);
|
void RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics);
|
||||||
void RecordOutputRenderDuration(double renderMilliseconds);
|
void RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds);
|
||||||
bool ApplyLifecycleTransition(VideoBackendLifecycleState state, const std::string& message);
|
bool ApplyLifecycleTransition(VideoBackendLifecycleState state, const std::string& message);
|
||||||
bool ApplyLifecycleFailure(const std::string& message);
|
bool ApplyLifecycleFailure(const std::string& message);
|
||||||
void PublishBackendStateChanged(const std::string& state, const std::string& message);
|
void PublishBackendStateChanged(const std::string& state, const std::string& message);
|
||||||
@@ -92,6 +100,7 @@ private:
|
|||||||
RuntimeEventDispatcher& mRuntimeEventDispatcher;
|
RuntimeEventDispatcher& mRuntimeEventDispatcher;
|
||||||
VideoBackendLifecycle mLifecycle;
|
VideoBackendLifecycle mLifecycle;
|
||||||
VideoPlayoutPolicy mPlayoutPolicy;
|
VideoPlayoutPolicy mPlayoutPolicy;
|
||||||
|
OutputProductionController mOutputProductionController;
|
||||||
RenderOutputQueue mReadyOutputQueue;
|
RenderOutputQueue mReadyOutputQueue;
|
||||||
std::unique_ptr<VideoIODevice> mVideoIODevice;
|
std::unique_ptr<VideoIODevice> mVideoIODevice;
|
||||||
std::unique_ptr<OpenGLVideoIOBridge> mBridge;
|
std::unique_ptr<OpenGLVideoIOBridge> mBridge;
|
||||||
@@ -99,8 +108,17 @@ private:
|
|||||||
std::condition_variable mOutputCompletionCondition;
|
std::condition_variable mOutputCompletionCondition;
|
||||||
std::deque<VideoIOCompletion> mPendingOutputCompletions;
|
std::deque<VideoIOCompletion> mPendingOutputCompletions;
|
||||||
std::thread mOutputCompletionWorker;
|
std::thread mOutputCompletionWorker;
|
||||||
|
std::mutex mOutputProducerMutex;
|
||||||
|
std::condition_variable mOutputProducerCondition;
|
||||||
|
std::thread mOutputProducerWorker;
|
||||||
|
VideoIOCompletion mLastOutputProductionCompletion;
|
||||||
|
std::chrono::steady_clock::time_point mLastOutputProductionTime;
|
||||||
|
std::mutex mOutputProductionMutex;
|
||||||
|
mutable std::mutex mOutputMetricsMutex;
|
||||||
bool mOutputCompletionWorkerRunning = false;
|
bool mOutputCompletionWorkerRunning = false;
|
||||||
bool mOutputCompletionWorkerStopping = false;
|
bool mOutputCompletionWorkerStopping = false;
|
||||||
|
bool mOutputProducerWorkerRunning = false;
|
||||||
|
bool mOutputProducerWorkerStopping = false;
|
||||||
uint64_t mNextReadyOutputFrameIndex = 0;
|
uint64_t mNextReadyOutputFrameIndex = 0;
|
||||||
uint64_t mInputFrameIndex = 0;
|
uint64_t mInputFrameIndex = 0;
|
||||||
uint64_t mOutputFrameScheduleIndex = 0;
|
uint64_t mOutputFrameScheduleIndex = 0;
|
||||||
@@ -121,6 +139,11 @@ private:
|
|||||||
double mOutputRenderMilliseconds = 0.0;
|
double mOutputRenderMilliseconds = 0.0;
|
||||||
double mSmoothedOutputRenderMilliseconds = 0.0;
|
double mSmoothedOutputRenderMilliseconds = 0.0;
|
||||||
double mMaxOutputRenderMilliseconds = 0.0;
|
double mMaxOutputRenderMilliseconds = 0.0;
|
||||||
|
double mOutputFrameAcquireMilliseconds = 0.0;
|
||||||
|
double mOutputFrameRenderRequestMilliseconds = 0.0;
|
||||||
|
double mOutputFrameEndAccessMilliseconds = 0.0;
|
||||||
|
uint64_t mLastLateStreak = 0;
|
||||||
|
uint64_t mLastDropStreak = 0;
|
||||||
uint64_t mLateFrameCount = 0;
|
uint64_t mLateFrameCount = 0;
|
||||||
uint64_t mDroppedFrameCount = 0;
|
uint64_t mDroppedFrameCount = 0;
|
||||||
uint64_t mFlushedFrameCount = 0;
|
uint64_t mFlushedFrameCount = 0;
|
||||||
|
|||||||
@@ -30,5 +30,8 @@ inline VideoPlayoutPolicy NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy policy)
|
|||||||
policy.targetReadyFrames = 1;
|
policy.targetReadyFrames = 1;
|
||||||
if (policy.maxReadyFrames < policy.targetReadyFrames)
|
if (policy.maxReadyFrames < policy.targetReadyFrames)
|
||||||
policy.maxReadyFrames = policy.targetReadyFrames;
|
policy.maxReadyFrames = policy.targetReadyFrames;
|
||||||
|
const unsigned minimumOutputFramePoolSize = policy.targetPrerollFrames + policy.maxReadyFrames + policy.minimumSpareDeviceFrames;
|
||||||
|
if (policy.outputFramePoolSize < minimumOutputFramePoolSize)
|
||||||
|
policy.outputFramePoolSize = minimumOutputFramePoolSize;
|
||||||
return policy;
|
return policy;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -423,7 +423,6 @@ bool DeckLinkSession::AcquireNextOutputVideoFrame(CComPtr<IDeckLinkMutableVideoF
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
outputVideoFrame = outputVideoFrameQueue.front();
|
outputVideoFrame = outputVideoFrameQueue.front();
|
||||||
outputVideoFrameQueue.push_back(outputVideoFrame);
|
|
||||||
outputVideoFrameQueue.pop_front();
|
outputVideoFrameQueue.pop_front();
|
||||||
return outputVideoFrame != nullptr;
|
return outputVideoFrame != nullptr;
|
||||||
}
|
}
|
||||||
@@ -448,6 +447,7 @@ bool DeckLinkSession::PopulateOutputFrame(IDeckLinkMutableVideoFrame* outputVide
|
|||||||
frame.width = mState.outputFrameSize.width;
|
frame.width = mState.outputFrameSize.width;
|
||||||
frame.height = mState.outputFrameSize.height;
|
frame.height = mState.outputFrameSize.height;
|
||||||
frame.pixelFormat = mState.outputPixelFormat;
|
frame.pixelFormat = mState.outputPixelFormat;
|
||||||
|
outputVideoFrame->AddRef();
|
||||||
frame.nativeFrame = outputVideoFrame;
|
frame.nativeFrame = outputVideoFrame;
|
||||||
frame.nativeBuffer = outputVideoFrameBuffer.Detach();
|
frame.nativeBuffer = outputVideoFrameBuffer.Detach();
|
||||||
return true;
|
return true;
|
||||||
@@ -506,7 +506,10 @@ VideoPlayoutRecoveryDecision DeckLinkSession::AccountForCompletionResult(VideoIO
|
|||||||
bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame)
|
bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame)
|
||||||
{
|
{
|
||||||
IDeckLinkMutableVideoFrame* outputVideoFrame = static_cast<IDeckLinkMutableVideoFrame*>(frame.nativeFrame);
|
IDeckLinkMutableVideoFrame* outputVideoFrame = static_cast<IDeckLinkMutableVideoFrame*>(frame.nativeFrame);
|
||||||
return ScheduleFrame(outputVideoFrame);
|
const bool scheduled = ScheduleFrame(outputVideoFrame);
|
||||||
|
if (outputVideoFrame != nullptr)
|
||||||
|
outputVideoFrame->Release();
|
||||||
|
return scheduled;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DeckLinkSession::Start()
|
bool DeckLinkSession::Start()
|
||||||
@@ -616,8 +619,18 @@ void DeckLinkSession::HandleVideoInputFrame(IDeckLinkVideoInputFrame* inputFrame
|
|||||||
inputFrameBuffer->EndAccess(bmdBufferAccessRead);
|
inputFrameBuffer->EndAccess(bmdBufferAccessRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame*, BMDOutputFrameCompletionResult completionResult)
|
void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult)
|
||||||
{
|
{
|
||||||
|
if (completedFrame != nullptr)
|
||||||
|
{
|
||||||
|
CComPtr<IDeckLinkMutableVideoFrame> reusableFrame;
|
||||||
|
if (completedFrame->QueryInterface(IID_IDeckLinkMutableVideoFrame, reinterpret_cast<void**>(&reusableFrame)) == S_OK &&
|
||||||
|
reusableFrame != nullptr)
|
||||||
|
{
|
||||||
|
outputVideoFrameQueue.push_back(reusableFrame);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!mOutputFrameCallback)
|
if (!mOutputFrameCallback)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ Phase 7 made backend lifecycle, playout policy, ready-frame queueing, late/drop
|
|||||||
## Status
|
## Status
|
||||||
|
|
||||||
- Phase 7.5 design package: proposed.
|
- Phase 7.5 design package: proposed.
|
||||||
- Phase 7.5 implementation: Step 2 in progress.
|
- Phase 7.5 implementation: Step 5 in progress.
|
||||||
- Current alignment: Phase 7 is complete. `RenderOutputQueue`, `VideoPlayoutPolicy`, `VideoPlayoutScheduler`, `VideoBackendLifecycle`, and backend playout telemetry exist. The backend worker fills the ready queue on completion demand, but render production is not yet proactively driven by queue pressure or video cadence.
|
- Current alignment: Phase 7 is complete. `RenderOutputQueue`, `VideoPlayoutPolicy`, `VideoPlayoutScheduler`, `VideoBackendLifecycle`, and backend playout telemetry exist. The backend worker fills the ready queue on completion demand, but render production is not yet proactively driven by queue pressure or video cadence.
|
||||||
|
|
||||||
Current footholds:
|
Current footholds:
|
||||||
@@ -19,6 +19,9 @@ Current footholds:
|
|||||||
- `HealthTelemetry::BackendPlayoutSnapshot` exposes queue depth, underruns, late/drop streaks, and recovery decisions.
|
- `HealthTelemetry::BackendPlayoutSnapshot` exposes queue depth, underruns, late/drop streaks, and recovery decisions.
|
||||||
- Step 1 adds baseline timing fields for ready-queue min/max/zero-depth samples and output render duration.
|
- Step 1 adds baseline timing fields for ready-queue min/max/zero-depth samples and output render duration.
|
||||||
- Step 2 adds a pure `OutputProductionController` for queue-pressure production decisions.
|
- Step 2 adds a pure `OutputProductionController` for queue-pressure production decisions.
|
||||||
|
- Step 3 adds a proactive output producer worker that keeps `RenderOutputQueue` warm after playback starts.
|
||||||
|
- Step 4 skips non-forced preview presentation while output ready-queue depth is below target.
|
||||||
|
- Step 5 makes async readback misses prefer cached output over synchronous readback after bootstrap.
|
||||||
|
|
||||||
## Timing Review Findings
|
## Timing Review Findings
|
||||||
|
|
||||||
@@ -199,15 +202,23 @@ Move from demand-filled output production to queue-pressure production.
|
|||||||
|
|
||||||
Initial target:
|
Initial target:
|
||||||
|
|
||||||
- producer wakes when queue depth is below target
|
- [x] producer wakes when queue depth is below target
|
||||||
- producer requests render-thread output production until target depth is reached
|
- [x] producer requests render-thread output production until target depth is reached
|
||||||
- producer stops when backend stops or render thread shuts down
|
- [x] producer stops when backend stops or render thread shuts down
|
||||||
- completion worker mostly schedules from already-ready frames
|
- [x] completion worker mostly schedules from already-ready frames
|
||||||
|
|
||||||
Exit criteria:
|
Exit criteria:
|
||||||
|
|
||||||
- normal playback does not depend on completion processing to fill the queue from empty
|
- [x] normal playback does not depend on completion processing to fill the queue from empty
|
||||||
- callback/completion pressure and render production pressure are separate
|
- [x] callback/completion pressure and render production pressure are separate
|
||||||
|
|
||||||
|
Implementation notes:
|
||||||
|
|
||||||
|
- `VideoBackend` starts the completion worker before device start, then starts the output producer only after DeckLink start succeeds. This avoids fighting DeckLink preroll for the same output frame pool.
|
||||||
|
- `OutputProducerWorkerMain()` periodically wakes and uses `OutputProductionController` to decide whether to produce, wait, or throttle.
|
||||||
|
- Completion handling records pacing/recovery, updates producer pressure, schedules a ready frame, and wakes the producer to refill headroom.
|
||||||
|
- Completion handling keeps a one-frame synchronous fallback when the ready queue is unexpectedly empty, then falls back to black underrun behavior if that also fails.
|
||||||
|
- Producer shutdown is explicit and joined before video output teardown.
|
||||||
|
|
||||||
### Step 4. Prioritize Playout Over Preview
|
### Step 4. Prioritize Playout Over Preview
|
||||||
|
|
||||||
@@ -215,15 +226,21 @@ Make preview explicitly subordinate to output playout deadlines.
|
|||||||
|
|
||||||
Initial target:
|
Initial target:
|
||||||
|
|
||||||
- skip or delay preview when ready queue depth is below target
|
- [x] skip or delay preview when ready queue depth is below target
|
||||||
- count skipped previews
|
- count skipped previews
|
||||||
- record preview present cost separately from output render cost
|
- record preview present cost separately from output render cost
|
||||||
|
|
||||||
Exit criteria:
|
Exit criteria:
|
||||||
|
|
||||||
- preview cannot drain output headroom invisibly
|
- [x] preview cannot drain output headroom invisibly
|
||||||
- runtime telemetry shows preview skips and preview present cost
|
- runtime telemetry shows preview skips and preview present cost
|
||||||
|
|
||||||
|
Implementation notes:
|
||||||
|
|
||||||
|
- `OpenGLComposite::paintGL(false)` now skips preview presentation when `VideoBackend` reports that the ready queue is below the target depth.
|
||||||
|
- Forced preview paints are still allowed so resize/manual paint behavior remains intact.
|
||||||
|
- Preview skip counters and present-cost telemetry remain follow-up work for this step.
|
||||||
|
|
||||||
### Step 5. Make Readback Miss Policy Deadline-Aware
|
### Step 5. Make Readback Miss Policy Deadline-Aware
|
||||||
|
|
||||||
Avoid turning a late async readback fence into synchronous deadline pressure by default.
|
Avoid turning a late async readback fence into synchronous deadline pressure by default.
|
||||||
@@ -232,13 +249,20 @@ Initial target:
|
|||||||
|
|
||||||
- count async readback misses
|
- count async readback misses
|
||||||
- count synchronous fallback uses
|
- count synchronous fallback uses
|
||||||
- allow policy to prefer stale/black output over synchronous fallback when queue pressure is high
|
- [x] allow policy to prefer stale/black output over synchronous fallback when queue pressure is high
|
||||||
- keep current fallback available while behavior is measured
|
- [x] keep current fallback available while behavior is measured
|
||||||
|
|
||||||
Exit criteria:
|
Exit criteria:
|
||||||
|
|
||||||
- readback fallback is an explicit policy decision
|
- [x] readback fallback is an explicit policy decision
|
||||||
- late GPU fences do not automatically block the most timing-sensitive path
|
- [x] late GPU fences do not automatically block the most timing-sensitive path
|
||||||
|
|
||||||
|
Implementation notes:
|
||||||
|
|
||||||
|
- `OpenGLRenderPipeline::ReadOutputFrame()` now uses synchronous readback only to bootstrap the first cached output frame.
|
||||||
|
- After cached output exists, an async readback miss copies the cached output frame into the DeckLink output frame instead of blocking on synchronous `glReadPixels`.
|
||||||
|
- Async readback queueing now skips when the next PBO slot is still in flight rather than deleting an in-flight fence and overwriting it.
|
||||||
|
- Miss/fallback counters remain follow-up telemetry work for this step.
|
||||||
|
|
||||||
### Step 6. Tune Headroom Policy
|
### Step 6. Tune Headroom Policy
|
||||||
|
|
||||||
|
|||||||
165
docs/PHASE_7_5_READBACK_EXPERIMENT_LOG.md
Normal file
165
docs/PHASE_7_5_READBACK_EXPERIMENT_LOG.md
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
# Phase 7.5 Readback Experiment Log
|
||||||
|
|
||||||
|
This log tracks short readback experiments during the proactive playout timing work.
|
||||||
|
|
||||||
|
## How To Run
|
||||||
|
|
||||||
|
The default debugger launch keeps the current production path:
|
||||||
|
|
||||||
|
- `Debug LoopThroughWithOpenGLCompositing`
|
||||||
|
- `VST_OUTPUT_READBACK_MODE` unset
|
||||||
|
- mode: `async_pbo`
|
||||||
|
|
||||||
|
Comparison modes are still available:
|
||||||
|
|
||||||
|
- `VST_OUTPUT_READBACK_MODE=async_pbo`
|
||||||
|
- uses the older PBO/fence readback path
|
||||||
|
|
||||||
|
The experiment launches are:
|
||||||
|
|
||||||
|
- `Debug LoopThroughWithOpenGLCompositing - sync readback experiment`
|
||||||
|
- `VST_OUTPUT_READBACK_MODE=sync`
|
||||||
|
- uses direct synchronous `glReadPixels()` every output frame
|
||||||
|
|
||||||
|
- `Debug LoopThroughWithOpenGLCompositing - cached output experiment`
|
||||||
|
- `VST_OUTPUT_READBACK_MODE=cached_only`
|
||||||
|
- uses one bootstrap synchronous readback, then copies the cached output frame without ongoing GPU readback
|
||||||
|
|
||||||
|
The cached-output experiment is not visually correct for live motion. It exists to test whether removing ongoing GPU readback lets the producer fill the ready queue again.
|
||||||
|
|
||||||
|
## Experiment 3: fast_transfer
|
||||||
|
|
||||||
|
Status: removed from active code after hardware sample
|
||||||
|
|
||||||
|
Date: 2026-05-11
|
||||||
|
|
||||||
|
Change:
|
||||||
|
|
||||||
|
- DeckLink output frames are now created with `CreateVideoFrameWithBuffer()`.
|
||||||
|
- Output frame buffers are owned by `PinnedMemoryAllocator`.
|
||||||
|
- `VideoIOOutputFrame` carries a texture-transfer callback.
|
||||||
|
- The test branch changed the default render readback path to try `VideoFrameTransfer::GPUtoCPU` against the output texture for BGRA output.
|
||||||
|
- If fast transfer is unavailable or fails, the code falls back to cached output if present, then synchronous readback as a safety fallback.
|
||||||
|
|
||||||
|
Question:
|
||||||
|
|
||||||
|
Can SDK-style pinned/DVP transfer recover real rendered output timing without the visually-invalid cached-only shortcut?
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
- The test machine reported `GL_VENDOR=NVIDIA Corporation` and `GL_RENDERER=NVIDIA GeForce RTX 4060 Ti/PCIe/SSE2`.
|
||||||
|
- The DeckLink SDK OpenGL fast-transfer sample gates NVIDIA DVP on `GL_RENDERER` containing `Quadro`.
|
||||||
|
- `GL_AMD_pinned_memory` was also unavailable.
|
||||||
|
- The fast-transfer path was removed from active code to avoid carrying unsupported DVP dependencies while we investigate CPU-frame buffering and render-ahead.
|
||||||
|
|
||||||
|
## Baseline: async_pbo
|
||||||
|
|
||||||
|
Date: 2026-05-11
|
||||||
|
|
||||||
|
Observed while the app was running after adding the async queue split counters.
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
|
||||||
|
- ready queue was pinned at 0 or briefly 1
|
||||||
|
- underrun, zero-depth, late, and dropped counts increased continuously
|
||||||
|
- `renderRequestMs` usually sat around 16-25 ms, with occasional larger spikes
|
||||||
|
- `asyncQueueMs` was mostly explained by `asyncQueueReadPixelsMs`
|
||||||
|
- PBO allocation/orphaning was effectively 0 ms
|
||||||
|
|
||||||
|
Representative samples:
|
||||||
|
|
||||||
|
| readyDepth | renderRequestMs | queueWaitMs | drawMs | mapMs | copyMs | asyncQueueMs | asyncQueueBufferMs | asyncQueueReadPixelsMs |
|
||||||
|
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
||||||
|
| 0 | 24.915 | 3.018 | 0.510 | 0.923 | 0.768 | 9.018 | 0.000 | 9.001 |
|
||||||
|
| 0 | 16.226 | 3.066 | 0.518 | 1.202 | 0.812 | 8.611 | 0.000 | 8.598 |
|
||||||
|
| 0 | 12.134 | 3.796 | 3.579 | 1.378 | 0.690 | 10.323 | 0.000 | 10.311 |
|
||||||
|
| 0 | 17.496 | 2.817 | 0.523 | 1.267 | 1.160 | 9.416 | 0.000 | 9.403 |
|
||||||
|
|
||||||
|
Initial read:
|
||||||
|
|
||||||
|
The main repeated cost is issuing `glReadPixels(..., nullptr)` into the PBO. `glBufferData`, setup, fence creation, fence wait, map, and CPU copy are not large enough to explain the underruns.
|
||||||
|
|
||||||
|
## Experiment 1: sync
|
||||||
|
|
||||||
|
Status: sampled
|
||||||
|
|
||||||
|
Question:
|
||||||
|
|
||||||
|
Does the direct synchronous readback path perform better or worse than the current PBO path on this machine and DeckLink format?
|
||||||
|
|
||||||
|
Expected interpretation:
|
||||||
|
|
||||||
|
- If `syncReadMs` is lower than `asyncQueueReadPixelsMs` and the ready queue improves, the current PBO path is the wrong strategy for this driver/format.
|
||||||
|
- If `syncReadMs` is also high and the ready queue remains empty, any GPU-to-CPU readback in this path is too expensive for the current producer cadence.
|
||||||
|
|
||||||
|
Results:
|
||||||
|
|
||||||
|
Date: 2026-05-11
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
|
||||||
|
- ready queue remained pinned at 0
|
||||||
|
- underrun, zero-depth, late, and dropped counts continued increasing
|
||||||
|
- `asyncQueueMs` and async readback counters were 0, confirming the experiment mode was active
|
||||||
|
- direct `syncReadMs` was generally worse than the baseline PBO `asyncQueueReadPixelsMs`
|
||||||
|
|
||||||
|
Representative samples:
|
||||||
|
|
||||||
|
| readyDepth | renderRequestMs | queueWaitMs | drawMs | syncReadMs | asyncQueueMs | syncFallbackCount |
|
||||||
|
| --- | ---: | ---: | ---: | ---: | ---: | ---: |
|
||||||
|
| 0 | 32.467 | 5.764 | 1.389 | 23.122 | 0.000 | 680 |
|
||||||
|
| 0 | 29.722 | 2.603 | 0.512 | 25.538 | 0.000 | 697 |
|
||||||
|
| 0 | 37.844 | 7.716 | 0.518 | 23.608 | 0.000 | 706 |
|
||||||
|
| 0 | 22.304 | 3.089 | 1.843 | 15.278 | 0.000 | 723 |
|
||||||
|
| 0 | 27.196 | 4.015 | 0.500 | 21.933 | 0.000 | 736 |
|
||||||
|
|
||||||
|
Read:
|
||||||
|
|
||||||
|
Direct synchronous readback does not recover the queue and is slower than the async PBO path on the sampled run. The bottleneck appears to be GPU-to-CPU readback itself, not PBO orphaning or fence handling.
|
||||||
|
|
||||||
|
## Experiment 2: cached_only
|
||||||
|
|
||||||
|
Status: sampled
|
||||||
|
|
||||||
|
Question:
|
||||||
|
|
||||||
|
If ongoing GPU readback is removed after bootstrap, can the producer keep the ready queue above 0?
|
||||||
|
|
||||||
|
Expected interpretation:
|
||||||
|
|
||||||
|
- If ready depth rises and underruns slow or stop, readback is the primary bottleneck.
|
||||||
|
- If ready depth still stays near 0, the bottleneck is elsewhere in scheduling, frame acquisition, queueing, or DeckLink handoff.
|
||||||
|
|
||||||
|
Results:
|
||||||
|
|
||||||
|
Date: 2026-05-11
|
||||||
|
|
||||||
|
User-visible result:
|
||||||
|
|
||||||
|
- DeckLink reported a healthy 5-frame buffer.
|
||||||
|
|
||||||
|
Telemetry summary:
|
||||||
|
|
||||||
|
- `renderRequestMs` dropped to roughly 1-3 ms.
|
||||||
|
- `cachedCopyMs` was usually around 0.8-1.0 ms, with one sampled low value around 0.37 ms.
|
||||||
|
- `asyncQueueMs`, `asyncQueueReadPixelsMs`, `syncReadMs`, fence wait, map, and async copy were 0 after bootstrap.
|
||||||
|
- `syncFallbackCount` stayed at 1, confirming one bootstrap readback.
|
||||||
|
- `cachedFallbackCount` increased continuously, confirming ongoing frames were served from cached CPU memory.
|
||||||
|
- late and dropped counts were 0 during the sampled run.
|
||||||
|
- internal ready queue depth still reported mostly 0-1 even while DeckLink showed a healthy hardware/device buffer.
|
||||||
|
|
||||||
|
Representative samples:
|
||||||
|
|
||||||
|
| readyDepth | renderRequestMs | queueWaitMs | drawMs | cachedCopyMs | asyncQueueMs | syncReadMs | late | dropped |
|
||||||
|
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
||||||
|
| 0 | 1.446 | 0.018 | 0.518 | 0.864 | 0.000 | 0.000 | 0 | 0 |
|
||||||
|
| 0 | 2.586 | 1.089 | 0.514 | 0.829 | 0.000 | 0.000 | 0 | 0 |
|
||||||
|
| 0 | 1.481 | 2.378 | 0.502 | 0.911 | 0.000 | 0.000 | 0 | 0 |
|
||||||
|
| 0 | 0.892 | 0.013 | 0.468 | 0.371 | 0.000 | 0.000 | 0 | 0 |
|
||||||
|
| 1 | 1.398 | 0.019 | 0.483 | 0.819 | 0.000 | 0.000 | 0 | 0 |
|
||||||
|
|
||||||
|
Read:
|
||||||
|
|
||||||
|
Removing ongoing GPU readback recovers output timing immediately. The direct cause of the Phase 7.5 playback collapse is the per-frame GPU-to-CPU readback cost, not DeckLink frame acquisition, output frame end-access, PBO allocation, fence waiting, or CPU copy.
|
||||||
|
|
||||||
|
The internal ready queue depth still being low while DeckLink reports a healthy device buffer suggests the ready queue is acting as a short staging queue rather than the full device playout buffer. For the next fix, prioritize avoiding a blocking readback on every output frame instead of only increasing internal ready queue depth.
|
||||||
@@ -363,6 +363,10 @@ components:
|
|||||||
$ref: "#/components/schemas/VideoIOStatus"
|
$ref: "#/components/schemas/VideoIOStatus"
|
||||||
performance:
|
performance:
|
||||||
$ref: "#/components/schemas/PerformanceStatus"
|
$ref: "#/components/schemas/PerformanceStatus"
|
||||||
|
backendPlayout:
|
||||||
|
$ref: "#/components/schemas/BackendPlayoutStatus"
|
||||||
|
runtimeEvents:
|
||||||
|
$ref: "#/components/schemas/RuntimeEventStatus"
|
||||||
shaders:
|
shaders:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
@@ -382,10 +386,16 @@ components:
|
|||||||
type: number
|
type: number
|
||||||
oscPort:
|
oscPort:
|
||||||
type: number
|
type: number
|
||||||
|
oscBindAddress:
|
||||||
|
type: string
|
||||||
|
oscSmoothing:
|
||||||
|
type: number
|
||||||
autoReload:
|
autoReload:
|
||||||
type: boolean
|
type: boolean
|
||||||
maxTemporalHistoryFrames:
|
maxTemporalHistoryFrames:
|
||||||
type: number
|
type: number
|
||||||
|
previewFps:
|
||||||
|
type: number
|
||||||
enableExternalKeying:
|
enableExternalKeying:
|
||||||
type: boolean
|
type: boolean
|
||||||
inputVideoFormat:
|
inputVideoFormat:
|
||||||
@@ -478,6 +488,175 @@ components:
|
|||||||
type: number
|
type: number
|
||||||
flushedFrameCount:
|
flushedFrameCount:
|
||||||
type: number
|
type: number
|
||||||
|
BackendPlayoutStatus:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
lifecycleState:
|
||||||
|
type: string
|
||||||
|
example: running
|
||||||
|
degraded:
|
||||||
|
type: boolean
|
||||||
|
statusMessage:
|
||||||
|
type: string
|
||||||
|
lateFrameCount:
|
||||||
|
type: number
|
||||||
|
droppedFrameCount:
|
||||||
|
type: number
|
||||||
|
flushedFrameCount:
|
||||||
|
type: number
|
||||||
|
readyQueue:
|
||||||
|
$ref: "#/components/schemas/BackendReadyQueueStatus"
|
||||||
|
outputRender:
|
||||||
|
$ref: "#/components/schemas/BackendOutputRenderStatus"
|
||||||
|
recovery:
|
||||||
|
$ref: "#/components/schemas/BackendPlayoutRecoveryStatus"
|
||||||
|
BackendReadyQueueStatus:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
depth:
|
||||||
|
type: number
|
||||||
|
description: Current number of ready output frames.
|
||||||
|
capacity:
|
||||||
|
type: number
|
||||||
|
description: Maximum ready output frames currently allowed.
|
||||||
|
minDepth:
|
||||||
|
type: number
|
||||||
|
description: Minimum observed ready queue depth since backend worker start.
|
||||||
|
maxDepth:
|
||||||
|
type: number
|
||||||
|
description: Maximum observed ready queue depth since backend worker start.
|
||||||
|
zeroDepthCount:
|
||||||
|
type: number
|
||||||
|
description: Number of observed samples where the ready queue was empty.
|
||||||
|
pushedCount:
|
||||||
|
type: number
|
||||||
|
poppedCount:
|
||||||
|
type: number
|
||||||
|
droppedCount:
|
||||||
|
type: number
|
||||||
|
underrunCount:
|
||||||
|
type: number
|
||||||
|
BackendOutputRenderStatus:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
renderMs:
|
||||||
|
type: number
|
||||||
|
description: Most recent output render duration in milliseconds.
|
||||||
|
smoothedRenderMs:
|
||||||
|
type: number
|
||||||
|
description: Smoothed output render duration in milliseconds.
|
||||||
|
maxRenderMs:
|
||||||
|
type: number
|
||||||
|
description: Maximum observed output render duration in milliseconds.
|
||||||
|
acquireFrameMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent acquiring a writable backend output frame in milliseconds.
|
||||||
|
renderRequestMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent executing the render-thread output frame request in milliseconds.
|
||||||
|
endAccessMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent ending write access to the backend output frame in milliseconds.
|
||||||
|
queueWaitMs:
|
||||||
|
type: number
|
||||||
|
description: Time the output render request spent waiting for the render thread in milliseconds.
|
||||||
|
drawMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent drawing, blitting, packing, and flushing the output frame in milliseconds.
|
||||||
|
fenceWaitMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent waiting for the async readback fence in milliseconds.
|
||||||
|
mapMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent mapping the async readback pixel buffer in milliseconds.
|
||||||
|
readbackCopyMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent copying async readback bytes into the backend output frame in milliseconds.
|
||||||
|
cachedCopyMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent copying the cached output frame when async readback is not ready in milliseconds.
|
||||||
|
asyncQueueMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent queueing the next async readback in milliseconds.
|
||||||
|
asyncQueueBufferMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent orphaning or allocating the async readback pixel buffer in milliseconds.
|
||||||
|
asyncQueueSetupMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent applying readback pixel-store, framebuffer, and pixel-pack-buffer state in milliseconds.
|
||||||
|
asyncQueueReadPixelsMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent issuing glReadPixels for the async readback in milliseconds.
|
||||||
|
asyncQueueFenceMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent creating the async readback fence in milliseconds.
|
||||||
|
syncReadMs:
|
||||||
|
type: number
|
||||||
|
description: Time spent in bootstrap synchronous readback in milliseconds.
|
||||||
|
asyncReadbackMissCount:
|
||||||
|
type: number
|
||||||
|
description: Count of output render requests where async readback was not ready.
|
||||||
|
cachedFallbackCount:
|
||||||
|
type: number
|
||||||
|
description: Count of output render requests served from the cached output frame.
|
||||||
|
syncFallbackCount:
|
||||||
|
type: number
|
||||||
|
description: Count of output render requests that used bootstrap synchronous readback.
|
||||||
|
BackendPlayoutRecoveryStatus:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completionResult:
|
||||||
|
type: string
|
||||||
|
enum: [Completed, DisplayedLate, Dropped, Flushed, Unknown]
|
||||||
|
completedFrameIndex:
|
||||||
|
type: number
|
||||||
|
scheduledFrameIndex:
|
||||||
|
type: number
|
||||||
|
scheduledLeadFrames:
|
||||||
|
type: number
|
||||||
|
measuredLagFrames:
|
||||||
|
type: number
|
||||||
|
catchUpFrames:
|
||||||
|
type: number
|
||||||
|
lateStreak:
|
||||||
|
type: number
|
||||||
|
dropStreak:
|
||||||
|
type: number
|
||||||
|
RuntimeEventStatus:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
queue:
|
||||||
|
$ref: "#/components/schemas/RuntimeEventQueueStatus"
|
||||||
|
dispatch:
|
||||||
|
$ref: "#/components/schemas/RuntimeEventDispatchStatus"
|
||||||
|
RuntimeEventQueueStatus:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
depth:
|
||||||
|
type: number
|
||||||
|
capacity:
|
||||||
|
type: number
|
||||||
|
droppedCount:
|
||||||
|
type: number
|
||||||
|
oldestEventAgeMs:
|
||||||
|
type: number
|
||||||
|
RuntimeEventDispatchStatus:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
dispatchCallCount:
|
||||||
|
type: number
|
||||||
|
dispatchedEventCount:
|
||||||
|
type: number
|
||||||
|
handlerInvocationCount:
|
||||||
|
type: number
|
||||||
|
handlerFailureCount:
|
||||||
|
type: number
|
||||||
|
lastDispatchDurationMs:
|
||||||
|
type: number
|
||||||
|
maxDispatchDurationMs:
|
||||||
|
type: number
|
||||||
ShaderSummary:
|
ShaderSummary:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
@@ -497,6 +676,8 @@ components:
|
|||||||
description: Error text for unavailable shader packages.
|
description: Error text for unavailable shader packages.
|
||||||
temporal:
|
temporal:
|
||||||
$ref: "#/components/schemas/TemporalState"
|
$ref: "#/components/schemas/TemporalState"
|
||||||
|
feedback:
|
||||||
|
$ref: "#/components/schemas/FeedbackState"
|
||||||
TemporalState:
|
TemporalState:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
@@ -509,6 +690,13 @@ components:
|
|||||||
type: number
|
type: number
|
||||||
effectiveHistoryLength:
|
effectiveHistoryLength:
|
||||||
type: number
|
type: number
|
||||||
|
FeedbackState:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
enabled:
|
||||||
|
type: boolean
|
||||||
|
writePass:
|
||||||
|
type: string
|
||||||
LayerState:
|
LayerState:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|||||||
@@ -95,6 +95,9 @@ void TestBackendPlayoutHealth()
|
|||||||
8.5,
|
8.5,
|
||||||
7.25,
|
7.25,
|
||||||
12.0,
|
12.0,
|
||||||
|
1.0,
|
||||||
|
6.5,
|
||||||
|
0.5,
|
||||||
8,
|
8,
|
||||||
11,
|
11,
|
||||||
3,
|
3,
|
||||||
@@ -121,6 +124,9 @@ void TestBackendPlayoutHealth()
|
|||||||
Expect(playout.outputRenderMilliseconds == 8.5, "backend playout health stores output render duration");
|
Expect(playout.outputRenderMilliseconds == 8.5, "backend playout health stores output render duration");
|
||||||
Expect(playout.smoothedOutputRenderMilliseconds == 7.25, "backend playout health stores smoothed output render duration");
|
Expect(playout.smoothedOutputRenderMilliseconds == 7.25, "backend playout health stores smoothed output render duration");
|
||||||
Expect(playout.maxOutputRenderMilliseconds == 12.0, "backend playout health stores max output render duration");
|
Expect(playout.maxOutputRenderMilliseconds == 12.0, "backend playout health stores max output render duration");
|
||||||
|
Expect(playout.outputFrameAcquireMilliseconds == 1.0, "backend playout health stores output frame acquire duration");
|
||||||
|
Expect(playout.outputFrameRenderRequestMilliseconds == 6.5, "backend playout health stores output render request duration");
|
||||||
|
Expect(playout.outputFrameEndAccessMilliseconds == 0.5, "backend playout health stores output frame end access duration");
|
||||||
Expect(playout.completedFrameIndex == 8, "backend playout health stores completed index");
|
Expect(playout.completedFrameIndex == 8, "backend playout health stores completed index");
|
||||||
Expect(playout.scheduledFrameIndex == 11, "backend playout health stores scheduled index");
|
Expect(playout.scheduledFrameIndex == 11, "backend playout health stores scheduled index");
|
||||||
Expect(playout.measuredLagFrames == 2, "backend playout health stores measured lag");
|
Expect(playout.measuredLagFrames == 2, "backend playout health stores measured lag");
|
||||||
@@ -148,6 +154,9 @@ void TestBackendPlayoutHealth()
|
|||||||
-5.0,
|
-5.0,
|
||||||
-4.0,
|
-4.0,
|
||||||
-3.0,
|
-3.0,
|
||||||
|
-2.0,
|
||||||
|
-1.0,
|
||||||
|
-0.5,
|
||||||
9,
|
9,
|
||||||
12,
|
12,
|
||||||
3,
|
3,
|
||||||
@@ -165,6 +174,32 @@ void TestBackendPlayoutHealth()
|
|||||||
Expect(snapshot.backendPlayout.lifecycleState == "Running", "full health snapshot includes backend playout state");
|
Expect(snapshot.backendPlayout.lifecycleState == "Running", "full health snapshot includes backend playout state");
|
||||||
Expect(!snapshot.backendPlayout.degraded, "full health snapshot includes backend degraded state");
|
Expect(!snapshot.backendPlayout.degraded, "full health snapshot includes backend degraded state");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TestOutputRenderPipelineTiming()
|
||||||
|
{
|
||||||
|
HealthTelemetry telemetry;
|
||||||
|
telemetry.RecordOutputRenderQueueWait(2.5);
|
||||||
|
telemetry.RecordOutputRenderPipelineTiming(1.0, 0.5, 0.25, 0.75, 0.125, 0.375, 0.1, 0.2, 0.3, 0.4, 3.5, true, true, false);
|
||||||
|
Expect(telemetry.TryRecordOutputRenderPipelineTiming(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, false, false, true),
|
||||||
|
"try output render timing succeeds when uncontended");
|
||||||
|
|
||||||
|
const HealthTelemetry::BackendPlayoutSnapshot playout = telemetry.GetBackendPlayoutSnapshot();
|
||||||
|
Expect(playout.outputRenderQueueWaitMilliseconds == 2.5, "output render timing stores queue wait");
|
||||||
|
Expect(playout.outputRenderDrawMilliseconds == 0.0, "output render timing clamps draw duration");
|
||||||
|
Expect(playout.outputReadbackFenceWaitMilliseconds == 0.0, "output render timing clamps fence wait duration");
|
||||||
|
Expect(playout.outputReadbackMapMilliseconds == 0.0, "output render timing clamps map duration");
|
||||||
|
Expect(playout.outputReadbackCopyMilliseconds == 0.0, "output render timing clamps readback copy duration");
|
||||||
|
Expect(playout.outputCachedCopyMilliseconds == 0.0, "output render timing clamps cached copy duration");
|
||||||
|
Expect(playout.outputAsyncQueueMilliseconds == 0.0, "output render timing clamps async queue duration");
|
||||||
|
Expect(playout.outputAsyncQueueBufferMilliseconds == 0.0, "output render timing clamps async queue buffer duration");
|
||||||
|
Expect(playout.outputAsyncQueueSetupMilliseconds == 0.0, "output render timing clamps async queue setup duration");
|
||||||
|
Expect(playout.outputAsyncQueueReadPixelsMilliseconds == 0.0, "output render timing clamps async queue read pixels duration");
|
||||||
|
Expect(playout.outputAsyncQueueFenceMilliseconds == 0.0, "output render timing clamps async queue fence duration");
|
||||||
|
Expect(playout.outputSyncReadMilliseconds == 0.0, "output render timing clamps sync read duration");
|
||||||
|
Expect(playout.outputAsyncReadbackMissCount == 1, "output render timing counts async readback misses");
|
||||||
|
Expect(playout.outputCachedFallbackCount == 1, "output render timing counts cached fallbacks");
|
||||||
|
Expect(playout.outputSyncFallbackCount == 1, "output render timing counts sync fallbacks");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
@@ -174,6 +209,7 @@ int main()
|
|||||||
TestRuntimeEventTryRecord();
|
TestRuntimeEventTryRecord();
|
||||||
TestPersistenceWriteHealth();
|
TestPersistenceWriteHealth();
|
||||||
TestBackendPlayoutHealth();
|
TestBackendPlayoutHealth();
|
||||||
|
TestOutputRenderPipelineTiming();
|
||||||
|
|
||||||
if (gFailures != 0)
|
if (gFailures != 0)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -93,9 +93,10 @@ void TestPolicyNormalization()
|
|||||||
policy.maxReadyFrames = 2;
|
policy.maxReadyFrames = 2;
|
||||||
|
|
||||||
VideoPlayoutPolicy normalized = NormalizeVideoPlayoutPolicy(policy);
|
VideoPlayoutPolicy normalized = NormalizeVideoPlayoutPolicy(policy);
|
||||||
Expect(normalized.outputFramePoolSize == 1, "policy normalization keeps at least one output frame");
|
|
||||||
Expect(normalized.targetPrerollFrames == 1, "policy normalization keeps at least one preroll frame");
|
Expect(normalized.targetPrerollFrames == 1, "policy normalization keeps at least one preroll frame");
|
||||||
Expect(normalized.maxReadyFrames == normalized.targetReadyFrames, "policy normalization keeps max ready frames above target");
|
Expect(normalized.maxReadyFrames == normalized.targetReadyFrames, "policy normalization keeps max ready frames above target");
|
||||||
|
Expect(normalized.outputFramePoolSize >= normalized.targetPrerollFrames + normalized.maxReadyFrames + normalized.minimumSpareDeviceFrames,
|
||||||
|
"policy normalization keeps enough output frames for preroll and ready queue ownership");
|
||||||
}
|
}
|
||||||
|
|
||||||
void TestFrameBudgets()
|
void TestFrameBudgets()
|
||||||
|
|||||||
Reference in New Issue
Block a user