Performance chasing
This commit is contained in:
44
.vscode/launch.json
vendored
44
.vscode/launch.json
vendored
@@ -17,6 +17,50 @@
|
||||
"moduleLoad": true
|
||||
},
|
||||
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
|
||||
},
|
||||
{
|
||||
"name": "Debug LoopThroughWithOpenGLCompositing - sync readback experiment",
|
||||
"type": "cppvsdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\LoopThroughWithOpenGLCompositing.exe",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
|
||||
"environment": [
|
||||
{
|
||||
"name": "VST_OUTPUT_READBACK_MODE",
|
||||
"value": "sync"
|
||||
}
|
||||
],
|
||||
"console": "internalConsole",
|
||||
"symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
|
||||
"requireExactSource": true,
|
||||
"logging": {
|
||||
"moduleLoad": true
|
||||
},
|
||||
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
|
||||
},
|
||||
{
|
||||
"name": "Debug LoopThroughWithOpenGLCompositing - cached output experiment",
|
||||
"type": "cppvsdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\LoopThroughWithOpenGLCompositing.exe",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
|
||||
"environment": [
|
||||
{
|
||||
"name": "VST_OUTPUT_READBACK_MODE",
|
||||
"value": "cached_only"
|
||||
}
|
||||
],
|
||||
"console": "internalConsole",
|
||||
"symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
|
||||
"requireExactSource": true,
|
||||
"logging": {
|
||||
"moduleLoad": true
|
||||
},
|
||||
"preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ RenderEngine::RenderEngine(
|
||||
mRenderPass(mRenderer),
|
||||
mRenderPipeline(mRenderer, runtimeSnapshotProvider, healthTelemetry, std::move(renderEffect), std::move(screenshotReady), std::move(previewPaint)),
|
||||
mShaderPrograms(mRenderer, runtimeSnapshotProvider),
|
||||
mHealthTelemetry(healthTelemetry),
|
||||
mHdc(hdc),
|
||||
mHglrc(hglrc),
|
||||
mFrameStateResolver(runtimeSnapshotProvider)
|
||||
@@ -546,7 +547,11 @@ bool RenderEngine::RequestOutputFrame(const RenderPipelineFrameContext& context,
|
||||
{
|
||||
if (mRenderThreadRunning)
|
||||
{
|
||||
return TryInvokeOnRenderThread("output-render", [this, &context, &outputFrame]() {
|
||||
const auto queuedAt = std::chrono::steady_clock::now();
|
||||
return TryInvokeOnRenderThread("output-render", [this, &context, &outputFrame, queuedAt]() {
|
||||
const auto startedAt = std::chrono::steady_clock::now();
|
||||
const double queueWaitMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(startedAt - queuedAt).count();
|
||||
mHealthTelemetry.TryRecordOutputRenderQueueWait(queueWaitMilliseconds);
|
||||
mRenderCommandQueue.RequestOutputFrame({ context.videoState, context.completion });
|
||||
RenderOutputFrameRequest request;
|
||||
return mRenderCommandQueue.TryTakeOutputFrame(request) &&
|
||||
|
||||
@@ -209,6 +209,7 @@ private:
|
||||
OpenGLRenderPass mRenderPass;
|
||||
OpenGLRenderPipeline mRenderPipeline;
|
||||
OpenGLShaderPrograms mShaderPrograms;
|
||||
HealthTelemetry& mHealthTelemetry;
|
||||
HDC mHdc;
|
||||
HGLRC mHglrc;
|
||||
|
||||
|
||||
@@ -164,6 +164,9 @@ error:
|
||||
|
||||
void OpenGLComposite::paintGL(bool force)
|
||||
{
|
||||
if (mRuntimeUpdateController)
|
||||
mRuntimeUpdateController->ProcessRuntimeWork();
|
||||
|
||||
if (!force)
|
||||
{
|
||||
if (IsIconic(hGLWnd))
|
||||
@@ -171,6 +174,12 @@ void OpenGLComposite::paintGL(bool force)
|
||||
}
|
||||
|
||||
const unsigned previewFps = mRuntimeStore ? mRuntimeStore->GetConfiguredPreviewFps() : 30u;
|
||||
if (!force && mVideoBackend && mVideoBackend->ShouldPrioritizeOutputOverPreview())
|
||||
{
|
||||
ValidateRect(hGLWnd, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!mRenderEngine->TryPresentPreview(force, previewFps, mVideoBackend->OutputFrameWidth(), mVideoBackend->OutputFrameHeight()))
|
||||
{
|
||||
ValidateRect(hGLWnd, NULL);
|
||||
@@ -261,6 +270,9 @@ bool OpenGLComposite::Start()
|
||||
if (!mRenderEngine->StartRenderThread())
|
||||
return false;
|
||||
|
||||
if (mRuntimeUpdateController)
|
||||
mRuntimeUpdateController->ProcessRuntimeWork();
|
||||
|
||||
if (mVideoBackend->Start())
|
||||
return true;
|
||||
|
||||
@@ -351,9 +363,6 @@ bool OpenGLComposite::RequestScreenshot(std::string& error)
|
||||
|
||||
void OpenGLComposite::renderEffect()
|
||||
{
|
||||
if (mRuntimeUpdateController)
|
||||
mRuntimeUpdateController->ProcessRuntimeWork();
|
||||
|
||||
const RenderFrameInput frameInput = BuildRenderFrameInput();
|
||||
RenderFrame(frameInput);
|
||||
}
|
||||
|
||||
@@ -8,7 +8,9 @@
|
||||
#include <cstring>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <gl/gl.h>
|
||||
#include <string>
|
||||
|
||||
OpenGLRenderPipeline::OpenGLRenderPipeline(
|
||||
OpenGLRenderer& renderer,
|
||||
@@ -22,7 +24,8 @@ OpenGLRenderPipeline::OpenGLRenderPipeline(
|
||||
mHealthTelemetry(healthTelemetry),
|
||||
mRenderEffect(renderEffect),
|
||||
mOutputReady(outputReady),
|
||||
mPaint(paint)
|
||||
mPaint(paint),
|
||||
mOutputReadbackMode(ReadOutputReadbackModeFromEnvironment())
|
||||
{
|
||||
}
|
||||
|
||||
@@ -53,9 +56,22 @@ bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context
|
||||
mHealthTelemetry.TryRecordPerformanceStats(state.frameBudgetMilliseconds, renderMilliseconds);
|
||||
mRuntimeSnapshotProvider.AdvanceFrame();
|
||||
|
||||
ReadOutputFrame(state, outputFrame);
|
||||
if (mPaint)
|
||||
mPaint();
|
||||
OutputReadbackTiming readbackTiming = ReadOutputFrame(state, outputFrame);
|
||||
mHealthTelemetry.TryRecordOutputRenderPipelineTiming(
|
||||
renderMilliseconds,
|
||||
readbackTiming.fenceWaitMilliseconds,
|
||||
readbackTiming.mapMilliseconds,
|
||||
readbackTiming.copyMilliseconds,
|
||||
readbackTiming.cachedCopyMilliseconds,
|
||||
readbackTiming.asyncQueueMilliseconds,
|
||||
readbackTiming.asyncQueueBufferMilliseconds,
|
||||
readbackTiming.asyncQueueSetupMilliseconds,
|
||||
readbackTiming.asyncQueueReadPixelsMilliseconds,
|
||||
readbackTiming.asyncQueueFenceMilliseconds,
|
||||
readbackTiming.syncReadMilliseconds,
|
||||
readbackTiming.asyncReadbackMissed,
|
||||
readbackTiming.cachedFallbackUsed,
|
||||
readbackTiming.syncFallbackUsed);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -151,8 +167,9 @@ void OpenGLRenderPipeline::FlushAsyncReadbackPipeline()
|
||||
mAsyncReadbackReadIndex = 0;
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
|
||||
bool OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing)
|
||||
{
|
||||
const auto queueStartTime = std::chrono::steady_clock::now();
|
||||
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
|
||||
const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height;
|
||||
const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA;
|
||||
@@ -161,8 +178,16 @@ void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
|
||||
const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width);
|
||||
const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height);
|
||||
|
||||
const auto finishTiming = [&timing, queueStartTime]() {
|
||||
const auto queueEndTime = std::chrono::steady_clock::now();
|
||||
timing.asyncQueueMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(queueEndTime - queueStartTime).count();
|
||||
};
|
||||
|
||||
if (requiredBytes == 0)
|
||||
return;
|
||||
{
|
||||
finishTiming();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mAsyncReadbackBytes != requiredBytes
|
||||
|| mAsyncReadbackFormat != format
|
||||
@@ -173,30 +198,50 @@ void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
|
||||
mAsyncReadbackType = type;
|
||||
mAsyncReadbackFramebuffer = framebuffer;
|
||||
if (!EnsureAsyncReadbackBuffers(requiredBytes))
|
||||
return;
|
||||
{
|
||||
finishTiming();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex];
|
||||
if (slot.fence != nullptr)
|
||||
if (slot.inFlight)
|
||||
{
|
||||
glDeleteSync(slot.fence);
|
||||
slot.fence = nullptr;
|
||||
finishTiming();
|
||||
return false;
|
||||
}
|
||||
|
||||
auto stageStartTime = std::chrono::steady_clock::now();
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
||||
auto stageEndTime = std::chrono::steady_clock::now();
|
||||
timing.asyncQueueSetupMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
|
||||
|
||||
stageStartTime = std::chrono::steady_clock::now();
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
|
||||
stageEndTime = std::chrono::steady_clock::now();
|
||||
timing.asyncQueueBufferMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
|
||||
|
||||
stageStartTime = std::chrono::steady_clock::now();
|
||||
glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr);
|
||||
stageEndTime = std::chrono::steady_clock::now();
|
||||
timing.asyncQueueReadPixelsMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
|
||||
|
||||
stageStartTime = std::chrono::steady_clock::now();
|
||||
slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
stageEndTime = std::chrono::steady_clock::now();
|
||||
timing.asyncQueueFenceMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
|
||||
slot.inFlight = slot.fence != nullptr;
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
|
||||
mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size();
|
||||
finishTiming();
|
||||
return slot.inFlight;
|
||||
}
|
||||
|
||||
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds)
|
||||
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds, OutputReadbackTiming& timing)
|
||||
{
|
||||
if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr)
|
||||
return false;
|
||||
@@ -206,15 +251,24 @@ bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFra
|
||||
return false;
|
||||
|
||||
const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0;
|
||||
const auto waitStartTime = std::chrono::steady_clock::now();
|
||||
const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds);
|
||||
const auto waitEndTime = std::chrono::steady_clock::now();
|
||||
timing.fenceWaitMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(waitEndTime - waitStartTime).count();
|
||||
if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
|
||||
{
|
||||
timing.asyncReadbackMissed = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
glDeleteSync(slot.fence);
|
||||
slot.fence = nullptr;
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
||||
const auto mapStartTime = std::chrono::steady_clock::now();
|
||||
void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
||||
const auto mapEndTime = std::chrono::steady_clock::now();
|
||||
timing.mapMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(mapEndTime - mapStartTime).count();
|
||||
if (mappedBytes == nullptr)
|
||||
{
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
@@ -223,7 +277,10 @@ bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFra
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto copyStartTime = std::chrono::steady_clock::now();
|
||||
std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes);
|
||||
const auto copyEndTime = std::chrono::steady_clock::now();
|
||||
timing.copyMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(copyEndTime - copyStartTime).count();
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
|
||||
@@ -243,8 +300,26 @@ void OpenGLRenderPipeline::CacheOutputFrame(const VideoIOOutputFrame& outputFram
|
||||
std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount);
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes)
|
||||
bool OpenGLRenderPipeline::TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const
|
||||
{
|
||||
if (outputFrame.bytes == nullptr || outputFrame.height == 0 || outputFrame.rowBytes <= 0)
|
||||
return false;
|
||||
|
||||
const std::size_t byteCount = static_cast<std::size_t>(outputFrame.rowBytes) * outputFrame.height;
|
||||
if (mCachedOutputFrame.size() != byteCount)
|
||||
return false;
|
||||
|
||||
const auto copyStartTime = std::chrono::steady_clock::now();
|
||||
std::memcpy(outputFrame.bytes, mCachedOutputFrame.data(), byteCount);
|
||||
const auto copyEndTime = std::chrono::steady_clock::now();
|
||||
timing.cachedCopyMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(copyEndTime - copyStartTime).count();
|
||||
timing.cachedFallbackUsed = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing)
|
||||
{
|
||||
const auto readStartTime = std::chrono::steady_clock::now();
|
||||
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
|
||||
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
||||
@@ -259,24 +334,78 @@ void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& stat
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
|
||||
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes);
|
||||
}
|
||||
const auto readEndTime = std::chrono::steady_clock::now();
|
||||
timing.syncReadMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(readEndTime - readStartTime).count();
|
||||
timing.syncFallbackUsed = true;
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
|
||||
OpenGLRenderPipeline::OutputReadbackTiming OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
|
||||
{
|
||||
if (TryConsumeAsyncReadback(outputFrame, 500000))
|
||||
OutputReadbackTiming timing;
|
||||
|
||||
if (mOutputReadbackMode == OutputReadbackMode::Synchronous)
|
||||
{
|
||||
QueueAsyncReadback(state);
|
||||
return;
|
||||
if (outputFrame.bytes != nullptr)
|
||||
{
|
||||
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
|
||||
CacheOutputFrame(outputFrame);
|
||||
}
|
||||
return timing;
|
||||
}
|
||||
|
||||
// If async readback misses the playout deadline, prefer a fresh synchronous
|
||||
// frame over reusing stale cached output, then restart the async pipeline.
|
||||
if (outputFrame.bytes != nullptr)
|
||||
if (mOutputReadbackMode == OutputReadbackMode::CachedOnly)
|
||||
{
|
||||
ReadOutputFrameSynchronously(state, outputFrame.bytes);
|
||||
if (TryCopyCachedOutputFrame(outputFrame, timing))
|
||||
return timing;
|
||||
|
||||
if (outputFrame.bytes != nullptr)
|
||||
{
|
||||
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
|
||||
CacheOutputFrame(outputFrame);
|
||||
}
|
||||
return timing;
|
||||
}
|
||||
|
||||
if (TryConsumeAsyncReadback(outputFrame, 500000, timing))
|
||||
{
|
||||
(void)QueueAsyncReadback(state, timing);
|
||||
return timing;
|
||||
}
|
||||
|
||||
if (TryCopyCachedOutputFrame(outputFrame, timing))
|
||||
{
|
||||
(void)QueueAsyncReadback(state, timing);
|
||||
return timing;
|
||||
}
|
||||
|
||||
// Bootstrap only: until the first async readback has produced cached output,
|
||||
// use one synchronous readback so DeckLink has a valid frame to schedule.
|
||||
if (outputFrame.bytes != nullptr && mCachedOutputFrame.empty())
|
||||
{
|
||||
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
|
||||
CacheOutputFrame(outputFrame);
|
||||
}
|
||||
|
||||
FlushAsyncReadbackPipeline();
|
||||
QueueAsyncReadback(state);
|
||||
(void)QueueAsyncReadback(state, timing);
|
||||
return timing;
|
||||
}
|
||||
|
||||
OpenGLRenderPipeline::OutputReadbackMode OpenGLRenderPipeline::ReadOutputReadbackModeFromEnvironment()
|
||||
{
|
||||
char* mode = nullptr;
|
||||
std::size_t modeSize = 0;
|
||||
if (_dupenv_s(&mode, &modeSize, "VST_OUTPUT_READBACK_MODE") != 0 || mode == nullptr)
|
||||
return OutputReadbackMode::AsyncPbo;
|
||||
|
||||
const std::string modeValue(mode);
|
||||
std::free(mode);
|
||||
if (modeValue == "async_pbo")
|
||||
return OutputReadbackMode::AsyncPbo;
|
||||
if (modeValue == "sync")
|
||||
return OutputReadbackMode::Synchronous;
|
||||
if (modeValue == "cached_only")
|
||||
return OutputReadbackMode::CachedOnly;
|
||||
|
||||
return OutputReadbackMode::AsyncPbo;
|
||||
}
|
||||
|
||||
@@ -36,6 +36,13 @@ public:
|
||||
bool RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame);
|
||||
|
||||
private:
|
||||
enum class OutputReadbackMode
|
||||
{
|
||||
AsyncPbo,
|
||||
Synchronous,
|
||||
CachedOnly
|
||||
};
|
||||
|
||||
struct AsyncReadbackSlot
|
||||
{
|
||||
GLuint pixelPackBuffer = 0;
|
||||
@@ -44,15 +51,34 @@ private:
|
||||
bool inFlight = false;
|
||||
};
|
||||
|
||||
struct OutputReadbackTiming
|
||||
{
|
||||
double fenceWaitMilliseconds = 0.0;
|
||||
double mapMilliseconds = 0.0;
|
||||
double copyMilliseconds = 0.0;
|
||||
double cachedCopyMilliseconds = 0.0;
|
||||
double asyncQueueMilliseconds = 0.0;
|
||||
double asyncQueueBufferMilliseconds = 0.0;
|
||||
double asyncQueueSetupMilliseconds = 0.0;
|
||||
double asyncQueueReadPixelsMilliseconds = 0.0;
|
||||
double asyncQueueFenceMilliseconds = 0.0;
|
||||
double syncReadMilliseconds = 0.0;
|
||||
bool asyncReadbackMissed = false;
|
||||
bool cachedFallbackUsed = false;
|
||||
bool syncFallbackUsed = false;
|
||||
};
|
||||
|
||||
bool EnsureAsyncReadbackBuffers(std::size_t requiredBytes);
|
||||
void ResetAsyncReadbackState();
|
||||
void FlushAsyncReadbackPipeline();
|
||||
void QueueAsyncReadback(const VideoIOState& state);
|
||||
bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds);
|
||||
bool QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing);
|
||||
bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds, OutputReadbackTiming& timing);
|
||||
void CacheOutputFrame(const VideoIOOutputFrame& outputFrame);
|
||||
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes);
|
||||
bool TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const;
|
||||
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing);
|
||||
void PackOutputFor10Bit(const VideoIOState& state);
|
||||
void ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
|
||||
OutputReadbackTiming ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
|
||||
static OutputReadbackMode ReadOutputReadbackModeFromEnvironment();
|
||||
|
||||
OpenGLRenderer& mRenderer;
|
||||
RuntimeSnapshotProvider& mRuntimeSnapshotProvider;
|
||||
@@ -60,6 +86,7 @@ private:
|
||||
RenderEffectCallback mRenderEffect;
|
||||
OutputReadyCallback mOutputReady;
|
||||
PaintCallback mPaint;
|
||||
OutputReadbackMode mOutputReadbackMode = OutputReadbackMode::AsyncPbo;
|
||||
std::array<AsyncReadbackSlot, 3> mAsyncReadbackSlots;
|
||||
std::size_t mAsyncReadbackWriteIndex = 0;
|
||||
std::size_t mAsyncReadbackReadIndex = 0;
|
||||
|
||||
@@ -95,6 +95,24 @@ JsonValue RuntimeStatePresenter::BuildRuntimeStateValue(const RuntimeStore& runt
|
||||
outputRender.set("renderMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderMilliseconds));
|
||||
outputRender.set("smoothedRenderMs", JsonValue(telemetrySnapshot.backendPlayout.smoothedOutputRenderMilliseconds));
|
||||
outputRender.set("maxRenderMs", JsonValue(telemetrySnapshot.backendPlayout.maxOutputRenderMilliseconds));
|
||||
outputRender.set("acquireFrameMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameAcquireMilliseconds));
|
||||
outputRender.set("renderRequestMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameRenderRequestMilliseconds));
|
||||
outputRender.set("endAccessMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameEndAccessMilliseconds));
|
||||
outputRender.set("queueWaitMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderQueueWaitMilliseconds));
|
||||
outputRender.set("drawMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderDrawMilliseconds));
|
||||
outputRender.set("fenceWaitMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackFenceWaitMilliseconds));
|
||||
outputRender.set("mapMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackMapMilliseconds));
|
||||
outputRender.set("readbackCopyMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackCopyMilliseconds));
|
||||
outputRender.set("cachedCopyMs", JsonValue(telemetrySnapshot.backendPlayout.outputCachedCopyMilliseconds));
|
||||
outputRender.set("asyncQueueMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueMilliseconds));
|
||||
outputRender.set("asyncQueueBufferMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueBufferMilliseconds));
|
||||
outputRender.set("asyncQueueSetupMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueSetupMilliseconds));
|
||||
outputRender.set("asyncQueueReadPixelsMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueReadPixelsMilliseconds));
|
||||
outputRender.set("asyncQueueFenceMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueFenceMilliseconds));
|
||||
outputRender.set("syncReadMs", JsonValue(telemetrySnapshot.backendPlayout.outputSyncReadMilliseconds));
|
||||
outputRender.set("asyncReadbackMissCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputAsyncReadbackMissCount)));
|
||||
outputRender.set("cachedFallbackCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputCachedFallbackCount)));
|
||||
outputRender.set("syncFallbackCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputSyncFallbackCount)));
|
||||
|
||||
JsonValue recovery = JsonValue::MakeObject();
|
||||
recovery.set("completionResult", JsonValue(telemetrySnapshot.backendPlayout.completionResult));
|
||||
|
||||
@@ -212,6 +212,7 @@ void HealthTelemetry::RecordBackendPlayoutHealth(const std::string& lifecycleSta
|
||||
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
||||
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
||||
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
||||
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
|
||||
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
||||
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
||||
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
||||
@@ -232,6 +233,9 @@ void HealthTelemetry::RecordBackendPlayoutHealth(const std::string& lifecycleSta
|
||||
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
|
||||
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
|
||||
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
|
||||
mBackendPlayout.outputFrameAcquireMilliseconds = std::max(outputFrameAcquireMilliseconds, 0.0);
|
||||
mBackendPlayout.outputFrameRenderRequestMilliseconds = std::max(outputFrameRenderRequestMilliseconds, 0.0);
|
||||
mBackendPlayout.outputFrameEndAccessMilliseconds = std::max(outputFrameEndAccessMilliseconds, 0.0);
|
||||
mBackendPlayout.completedFrameIndex = completedFrameIndex;
|
||||
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
|
||||
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
|
||||
@@ -251,6 +255,7 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
|
||||
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
||||
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
||||
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
||||
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
|
||||
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
||||
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
||||
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
||||
@@ -274,6 +279,9 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
|
||||
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
|
||||
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
|
||||
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
|
||||
mBackendPlayout.outputFrameAcquireMilliseconds = std::max(outputFrameAcquireMilliseconds, 0.0);
|
||||
mBackendPlayout.outputFrameRenderRequestMilliseconds = std::max(outputFrameRenderRequestMilliseconds, 0.0);
|
||||
mBackendPlayout.outputFrameEndAccessMilliseconds = std::max(outputFrameEndAccessMilliseconds, 0.0);
|
||||
mBackendPlayout.completedFrameIndex = completedFrameIndex;
|
||||
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
|
||||
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
|
||||
@@ -289,6 +297,98 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
|
||||
return true;
|
||||
}
|
||||
|
||||
void HealthTelemetry::RecordOutputRenderQueueWait(double queueWaitMilliseconds)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mMutex);
|
||||
mBackendPlayout.outputRenderQueueWaitMilliseconds = std::max(queueWaitMilliseconds, 0.0);
|
||||
}
|
||||
|
||||
bool HealthTelemetry::TryRecordOutputRenderQueueWait(double queueWaitMilliseconds)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
|
||||
if (!lock.owns_lock())
|
||||
return false;
|
||||
|
||||
mBackendPlayout.outputRenderQueueWaitMilliseconds = std::max(queueWaitMilliseconds, 0.0);
|
||||
return true;
|
||||
}
|
||||
|
||||
void HealthTelemetry::RecordOutputRenderPipelineTiming(
|
||||
double drawMilliseconds,
|
||||
double fenceWaitMilliseconds,
|
||||
double mapMilliseconds,
|
||||
double readbackCopyMilliseconds,
|
||||
double cachedCopyMilliseconds,
|
||||
double asyncQueueMilliseconds,
|
||||
double asyncQueueBufferMilliseconds,
|
||||
double asyncQueueSetupMilliseconds,
|
||||
double asyncQueueReadPixelsMilliseconds,
|
||||
double asyncQueueFenceMilliseconds,
|
||||
double syncReadMilliseconds,
|
||||
bool asyncReadbackMissed,
|
||||
bool cachedFallbackUsed,
|
||||
bool syncFallbackUsed)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mMutex);
|
||||
mBackendPlayout.outputRenderDrawMilliseconds = std::max(drawMilliseconds, 0.0);
|
||||
mBackendPlayout.outputReadbackFenceWaitMilliseconds = std::max(fenceWaitMilliseconds, 0.0);
|
||||
mBackendPlayout.outputReadbackMapMilliseconds = std::max(mapMilliseconds, 0.0);
|
||||
mBackendPlayout.outputReadbackCopyMilliseconds = std::max(readbackCopyMilliseconds, 0.0);
|
||||
mBackendPlayout.outputCachedCopyMilliseconds = std::max(cachedCopyMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueMilliseconds = std::max(asyncQueueMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueBufferMilliseconds = std::max(asyncQueueBufferMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueSetupMilliseconds = std::max(asyncQueueSetupMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueReadPixelsMilliseconds = std::max(asyncQueueReadPixelsMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueFenceMilliseconds = std::max(asyncQueueFenceMilliseconds, 0.0);
|
||||
mBackendPlayout.outputSyncReadMilliseconds = std::max(syncReadMilliseconds, 0.0);
|
||||
if (asyncReadbackMissed)
|
||||
++mBackendPlayout.outputAsyncReadbackMissCount;
|
||||
if (cachedFallbackUsed)
|
||||
++mBackendPlayout.outputCachedFallbackCount;
|
||||
if (syncFallbackUsed)
|
||||
++mBackendPlayout.outputSyncFallbackCount;
|
||||
}
|
||||
|
||||
bool HealthTelemetry::TryRecordOutputRenderPipelineTiming(
|
||||
double drawMilliseconds,
|
||||
double fenceWaitMilliseconds,
|
||||
double mapMilliseconds,
|
||||
double readbackCopyMilliseconds,
|
||||
double cachedCopyMilliseconds,
|
||||
double asyncQueueMilliseconds,
|
||||
double asyncQueueBufferMilliseconds,
|
||||
double asyncQueueSetupMilliseconds,
|
||||
double asyncQueueReadPixelsMilliseconds,
|
||||
double asyncQueueFenceMilliseconds,
|
||||
double syncReadMilliseconds,
|
||||
bool asyncReadbackMissed,
|
||||
bool cachedFallbackUsed,
|
||||
bool syncFallbackUsed)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
|
||||
if (!lock.owns_lock())
|
||||
return false;
|
||||
|
||||
mBackendPlayout.outputRenderDrawMilliseconds = std::max(drawMilliseconds, 0.0);
|
||||
mBackendPlayout.outputReadbackFenceWaitMilliseconds = std::max(fenceWaitMilliseconds, 0.0);
|
||||
mBackendPlayout.outputReadbackMapMilliseconds = std::max(mapMilliseconds, 0.0);
|
||||
mBackendPlayout.outputReadbackCopyMilliseconds = std::max(readbackCopyMilliseconds, 0.0);
|
||||
mBackendPlayout.outputCachedCopyMilliseconds = std::max(cachedCopyMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueMilliseconds = std::max(asyncQueueMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueBufferMilliseconds = std::max(asyncQueueBufferMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueSetupMilliseconds = std::max(asyncQueueSetupMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueReadPixelsMilliseconds = std::max(asyncQueueReadPixelsMilliseconds, 0.0);
|
||||
mBackendPlayout.outputAsyncQueueFenceMilliseconds = std::max(asyncQueueFenceMilliseconds, 0.0);
|
||||
mBackendPlayout.outputSyncReadMilliseconds = std::max(syncReadMilliseconds, 0.0);
|
||||
if (asyncReadbackMissed)
|
||||
++mBackendPlayout.outputAsyncReadbackMissCount;
|
||||
if (cachedFallbackUsed)
|
||||
++mBackendPlayout.outputCachedFallbackCount;
|
||||
if (syncFallbackUsed)
|
||||
++mBackendPlayout.outputSyncFallbackCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
HealthTelemetry::SignalStatusSnapshot HealthTelemetry::GetSignalStatusSnapshot() const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mMutex);
|
||||
|
||||
@@ -97,6 +97,24 @@ public:
|
||||
double outputRenderMilliseconds = 0.0;
|
||||
double smoothedOutputRenderMilliseconds = 0.0;
|
||||
double maxOutputRenderMilliseconds = 0.0;
|
||||
double outputFrameAcquireMilliseconds = 0.0;
|
||||
double outputFrameRenderRequestMilliseconds = 0.0;
|
||||
double outputFrameEndAccessMilliseconds = 0.0;
|
||||
double outputRenderQueueWaitMilliseconds = 0.0;
|
||||
double outputRenderDrawMilliseconds = 0.0;
|
||||
double outputReadbackFenceWaitMilliseconds = 0.0;
|
||||
double outputReadbackMapMilliseconds = 0.0;
|
||||
double outputReadbackCopyMilliseconds = 0.0;
|
||||
double outputCachedCopyMilliseconds = 0.0;
|
||||
double outputAsyncQueueMilliseconds = 0.0;
|
||||
double outputAsyncQueueBufferMilliseconds = 0.0;
|
||||
double outputAsyncQueueSetupMilliseconds = 0.0;
|
||||
double outputAsyncQueueReadPixelsMilliseconds = 0.0;
|
||||
double outputAsyncQueueFenceMilliseconds = 0.0;
|
||||
double outputSyncReadMilliseconds = 0.0;
|
||||
uint64_t outputAsyncReadbackMissCount = 0;
|
||||
uint64_t outputCachedFallbackCount = 0;
|
||||
uint64_t outputSyncFallbackCount = 0;
|
||||
uint64_t completedFrameIndex = 0;
|
||||
uint64_t scheduledFrameIndex = 0;
|
||||
uint64_t scheduledLeadFrames = 0;
|
||||
@@ -161,6 +179,7 @@ public:
|
||||
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
||||
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
||||
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
||||
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
|
||||
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
||||
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
||||
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
||||
@@ -170,11 +189,46 @@ public:
|
||||
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
|
||||
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
|
||||
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
|
||||
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
|
||||
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
|
||||
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
|
||||
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
|
||||
bool degraded, const std::string& statusMessage);
|
||||
|
||||
void RecordOutputRenderQueueWait(double queueWaitMilliseconds);
|
||||
bool TryRecordOutputRenderQueueWait(double queueWaitMilliseconds);
|
||||
|
||||
void RecordOutputRenderPipelineTiming(
|
||||
double drawMilliseconds,
|
||||
double fenceWaitMilliseconds,
|
||||
double mapMilliseconds,
|
||||
double readbackCopyMilliseconds,
|
||||
double cachedCopyMilliseconds,
|
||||
double asyncQueueMilliseconds,
|
||||
double asyncQueueBufferMilliseconds,
|
||||
double asyncQueueSetupMilliseconds,
|
||||
double asyncQueueReadPixelsMilliseconds,
|
||||
double asyncQueueFenceMilliseconds,
|
||||
double syncReadMilliseconds,
|
||||
bool asyncReadbackMissed,
|
||||
bool cachedFallbackUsed,
|
||||
bool syncFallbackUsed);
|
||||
bool TryRecordOutputRenderPipelineTiming(
|
||||
double drawMilliseconds,
|
||||
double fenceWaitMilliseconds,
|
||||
double mapMilliseconds,
|
||||
double readbackCopyMilliseconds,
|
||||
double cachedCopyMilliseconds,
|
||||
double asyncQueueMilliseconds,
|
||||
double asyncQueueBufferMilliseconds,
|
||||
double asyncQueueSetupMilliseconds,
|
||||
double asyncQueueReadPixelsMilliseconds,
|
||||
double asyncQueueFenceMilliseconds,
|
||||
double syncReadMilliseconds,
|
||||
bool asyncReadbackMissed,
|
||||
bool cachedFallbackUsed,
|
||||
bool syncFallbackUsed);
|
||||
|
||||
SignalStatusSnapshot GetSignalStatusSnapshot() const;
|
||||
VideoIOStatusSnapshot GetVideoIOStatusSnapshot() const;
|
||||
PerformanceSnapshot GetPerformanceSnapshot() const;
|
||||
|
||||
@@ -9,12 +9,14 @@
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <windows.h>
|
||||
|
||||
VideoBackend::VideoBackend(RenderEngine& renderEngine, HealthTelemetry& healthTelemetry, RuntimeEventDispatcher& runtimeEventDispatcher) :
|
||||
mHealthTelemetry(healthTelemetry),
|
||||
mRuntimeEventDispatcher(runtimeEventDispatcher),
|
||||
mPlayoutPolicy(NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy())),
|
||||
mOutputProductionController(mPlayoutPolicy),
|
||||
mReadyOutputQueue(mPlayoutPolicy),
|
||||
mVideoIODevice(std::make_unique<DeckLinkSession>()),
|
||||
mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine))
|
||||
@@ -80,6 +82,7 @@ bool VideoBackend::ConfigureInput(const VideoFormat& inputVideoMode, std::string
|
||||
bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error)
|
||||
{
|
||||
mPlayoutPolicy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy);
|
||||
mOutputProductionController.Configure(mPlayoutPolicy);
|
||||
mReadyOutputQueue.Configure(mPlayoutPolicy);
|
||||
if (mLifecycle.State() != VideoBackendLifecycleState::Configuring)
|
||||
ApplyLifecycleTransition(VideoBackendLifecycleState::Configuring, "Configuring video backend output.");
|
||||
@@ -101,7 +104,10 @@ bool VideoBackend::Start()
|
||||
StartOutputCompletionWorker();
|
||||
const bool started = mVideoIODevice->Start();
|
||||
if (started)
|
||||
{
|
||||
StartOutputProducerWorker();
|
||||
ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started.");
|
||||
}
|
||||
else
|
||||
{
|
||||
StopOutputCompletionWorker();
|
||||
@@ -113,6 +119,7 @@ bool VideoBackend::Start()
|
||||
bool VideoBackend::Stop()
|
||||
{
|
||||
ApplyLifecycleTransition(VideoBackendLifecycleState::Stopping, "Video backend stopping.");
|
||||
StopOutputProducerWorker();
|
||||
const bool stopped = mVideoIODevice->Stop();
|
||||
StopOutputCompletionWorker();
|
||||
if (stopped)
|
||||
@@ -232,6 +239,12 @@ const std::string& VideoBackend::StatusMessage() const
|
||||
return mVideoIODevice->StatusMessage();
|
||||
}
|
||||
|
||||
bool VideoBackend::ShouldPrioritizeOutputOverPreview() const
|
||||
{
|
||||
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
|
||||
return metrics.depth < static_cast<std::size_t>(mPlayoutPolicy.targetReadyFrames);
|
||||
}
|
||||
|
||||
void VideoBackend::SetStatusMessage(const std::string& message)
|
||||
{
|
||||
mVideoIODevice->SetStatusMessage(message);
|
||||
@@ -288,27 +301,36 @@ void VideoBackend::HandleOutputFrameCompletion(const VideoIOCompletion& completi
|
||||
|
||||
void VideoBackend::StartOutputCompletionWorker()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
|
||||
if (mOutputCompletionWorkerRunning)
|
||||
return;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
|
||||
if (mOutputCompletionWorkerRunning)
|
||||
return;
|
||||
|
||||
mPendingOutputCompletions.clear();
|
||||
mReadyOutputQueue.Clear();
|
||||
mNextReadyOutputFrameIndex = 0;
|
||||
mHasReadyQueueDepthBaseline = false;
|
||||
mMinReadyQueueDepth = 0;
|
||||
mMaxReadyQueueDepth = 0;
|
||||
mReadyQueueZeroDepthCount = 0;
|
||||
mOutputRenderMilliseconds = 0.0;
|
||||
mSmoothedOutputRenderMilliseconds = 0.0;
|
||||
mMaxOutputRenderMilliseconds = 0.0;
|
||||
mOutputCompletionWorkerStopping = false;
|
||||
mOutputCompletionWorkerRunning = true;
|
||||
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
|
||||
mPendingOutputCompletions.clear();
|
||||
mReadyOutputQueue.Clear();
|
||||
mNextReadyOutputFrameIndex = 0;
|
||||
mHasReadyQueueDepthBaseline = false;
|
||||
mMinReadyQueueDepth = 0;
|
||||
mMaxReadyQueueDepth = 0;
|
||||
mReadyQueueZeroDepthCount = 0;
|
||||
mOutputRenderMilliseconds = 0.0;
|
||||
mSmoothedOutputRenderMilliseconds = 0.0;
|
||||
mMaxOutputRenderMilliseconds = 0.0;
|
||||
mOutputFrameAcquireMilliseconds = 0.0;
|
||||
mOutputFrameRenderRequestMilliseconds = 0.0;
|
||||
mOutputFrameEndAccessMilliseconds = 0.0;
|
||||
mLastLateStreak = 0;
|
||||
mLastDropStreak = 0;
|
||||
mOutputCompletionWorkerStopping = false;
|
||||
mOutputCompletionWorkerRunning = true;
|
||||
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
|
||||
}
|
||||
}
|
||||
|
||||
void VideoBackend::StopOutputCompletionWorker()
|
||||
{
|
||||
StopOutputProducerWorker();
|
||||
|
||||
bool shouldJoin = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
|
||||
@@ -322,6 +344,40 @@ void VideoBackend::StopOutputCompletionWorker()
|
||||
mOutputCompletionWorker.join();
|
||||
}
|
||||
|
||||
void VideoBackend::StartOutputProducerWorker()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
|
||||
if (mOutputProducerWorkerRunning)
|
||||
return;
|
||||
|
||||
mLastOutputProductionCompletion = VideoIOCompletion();
|
||||
mLastOutputProductionTime = std::chrono::steady_clock::time_point();
|
||||
mOutputProducerWorkerStopping = false;
|
||||
mOutputProducerWorkerRunning = true;
|
||||
mOutputProducerWorker = std::thread(&VideoBackend::OutputProducerWorkerMain, this);
|
||||
mOutputProducerCondition.notify_one();
|
||||
}
|
||||
|
||||
void VideoBackend::StopOutputProducerWorker()
|
||||
{
|
||||
bool shouldJoin = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
|
||||
if (mOutputProducerWorkerRunning)
|
||||
mOutputProducerWorkerStopping = true;
|
||||
shouldJoin = mOutputProducerWorker.joinable();
|
||||
}
|
||||
mOutputProducerCondition.notify_one();
|
||||
|
||||
if (shouldJoin)
|
||||
mOutputProducerWorker.join();
|
||||
}
|
||||
|
||||
void VideoBackend::NotifyOutputProducer()
|
||||
{
|
||||
mOutputProducerCondition.notify_one();
|
||||
}
|
||||
|
||||
void VideoBackend::OutputCompletionWorkerMain()
|
||||
{
|
||||
for (;;)
|
||||
@@ -351,6 +407,57 @@ void VideoBackend::OutputCompletionWorkerMain()
|
||||
}
|
||||
}
|
||||
|
||||
void VideoBackend::OutputProducerWorkerMain()
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mOutputProducerMutex);
|
||||
mOutputProducerCondition.wait_for(lock, OutputProducerWakeInterval());
|
||||
|
||||
if (mOutputProducerWorkerStopping)
|
||||
{
|
||||
mOutputProducerWorkerRunning = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
|
||||
RecordReadyQueueDepthSample(metrics);
|
||||
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
|
||||
if (decision.action != OutputProductionAction::Produce || decision.requestedFrames == 0)
|
||||
continue;
|
||||
|
||||
VideoIOCompletion completion;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
|
||||
if (mOutputProducerWorkerStopping)
|
||||
continue;
|
||||
completion = mLastOutputProductionCompletion;
|
||||
}
|
||||
|
||||
const auto now = std::chrono::steady_clock::now();
|
||||
if (mLastOutputProductionTime != std::chrono::steady_clock::time_point() &&
|
||||
now - mLastOutputProductionTime < OutputProducerWakeInterval())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ProduceReadyOutputFrames(completion, 1) > 0)
|
||||
mLastOutputProductionTime = std::chrono::steady_clock::now();
|
||||
}
|
||||
}
|
||||
|
||||
std::chrono::milliseconds VideoBackend::OutputProducerWakeInterval() const
|
||||
{
|
||||
const double frameBudgetMilliseconds = State().frameBudgetMilliseconds;
|
||||
if (frameBudgetMilliseconds <= 0.0)
|
||||
return std::chrono::milliseconds(8);
|
||||
|
||||
const int intervalMilliseconds = (std::max)(1, static_cast<int>(std::floor(frameBudgetMilliseconds * 0.75)));
|
||||
return std::chrono::milliseconds(intervalMilliseconds);
|
||||
}
|
||||
|
||||
void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion)
|
||||
{
|
||||
RecordFramePacing(completion.result);
|
||||
@@ -358,31 +465,69 @@ void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& complet
|
||||
const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics();
|
||||
RecordReadyQueueDepthSample(initialQueueMetrics);
|
||||
const VideoPlayoutRecoveryDecision recoveryDecision = AccountForCompletionResult(completion.result, initialQueueMetrics.depth);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||
mLastLateStreak = recoveryDecision.lateStreak;
|
||||
mLastDropStreak = recoveryDecision.dropStreak;
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
|
||||
mLastOutputProductionCompletion = completion;
|
||||
}
|
||||
NotifyOutputProducer();
|
||||
|
||||
FillReadyOutputQueue(completion);
|
||||
if (!ScheduleReadyOutputFrame())
|
||||
if (!ScheduleReadyOutputFrame() &&
|
||||
(ProduceReadyOutputFrames(completion, 1) == 0 || !ScheduleReadyOutputFrame()))
|
||||
{
|
||||
ScheduleBlackUnderrunFrame();
|
||||
}
|
||||
NotifyOutputProducer();
|
||||
RecordBackendPlayoutHealth(completion.result, recoveryDecision);
|
||||
}
|
||||
|
||||
void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision)
|
||||
{
|
||||
const RenderOutputQueueMetrics queueMetrics = mReadyOutputQueue.GetMetrics();
|
||||
std::size_t minReadyQueueDepth = 0;
|
||||
std::size_t maxReadyQueueDepth = 0;
|
||||
uint64_t readyQueueZeroDepthCount = 0;
|
||||
double outputRenderMilliseconds = 0.0;
|
||||
double smoothedOutputRenderMilliseconds = 0.0;
|
||||
double maxOutputRenderMilliseconds = 0.0;
|
||||
double outputFrameAcquireMilliseconds = 0.0;
|
||||
double outputFrameRenderRequestMilliseconds = 0.0;
|
||||
double outputFrameEndAccessMilliseconds = 0.0;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||
minReadyQueueDepth = mMinReadyQueueDepth;
|
||||
maxReadyQueueDepth = mMaxReadyQueueDepth;
|
||||
readyQueueZeroDepthCount = mReadyQueueZeroDepthCount;
|
||||
outputRenderMilliseconds = mOutputRenderMilliseconds;
|
||||
smoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds;
|
||||
maxOutputRenderMilliseconds = mMaxOutputRenderMilliseconds;
|
||||
outputFrameAcquireMilliseconds = mOutputFrameAcquireMilliseconds;
|
||||
outputFrameRenderRequestMilliseconds = mOutputFrameRenderRequestMilliseconds;
|
||||
outputFrameEndAccessMilliseconds = mOutputFrameEndAccessMilliseconds;
|
||||
}
|
||||
|
||||
mHealthTelemetry.TryRecordBackendPlayoutHealth(
|
||||
VideoBackendLifecycle::StateName(mLifecycle.State()),
|
||||
CompletionResultName(result),
|
||||
queueMetrics.depth,
|
||||
queueMetrics.capacity,
|
||||
queueMetrics.pushedCount,
|
||||
mMinReadyQueueDepth,
|
||||
mMaxReadyQueueDepth,
|
||||
mReadyQueueZeroDepthCount,
|
||||
minReadyQueueDepth,
|
||||
maxReadyQueueDepth,
|
||||
readyQueueZeroDepthCount,
|
||||
queueMetrics.poppedCount,
|
||||
queueMetrics.droppedCount,
|
||||
queueMetrics.underrunCount,
|
||||
mOutputRenderMilliseconds,
|
||||
mSmoothedOutputRenderMilliseconds,
|
||||
mMaxOutputRenderMilliseconds,
|
||||
outputRenderMilliseconds,
|
||||
smoothedOutputRenderMilliseconds,
|
||||
maxOutputRenderMilliseconds,
|
||||
outputFrameAcquireMilliseconds,
|
||||
outputFrameRenderRequestMilliseconds,
|
||||
outputFrameEndAccessMilliseconds,
|
||||
recoveryDecision.completedFrameIndex,
|
||||
recoveryDecision.scheduledFrameIndex,
|
||||
recoveryDecision.scheduledLeadFrames,
|
||||
@@ -397,50 +542,93 @@ void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, co
|
||||
StatusMessage());
|
||||
}
|
||||
|
||||
bool VideoBackend::FillReadyOutputQueue(const VideoIOCompletion& completion)
|
||||
std::size_t VideoBackend::ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames)
|
||||
{
|
||||
if (maxFrames == 0)
|
||||
return 0;
|
||||
|
||||
std::lock_guard<std::mutex> productionLock(mOutputProductionMutex);
|
||||
RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
|
||||
bool filledAny = false;
|
||||
while (metrics.depth < mPlayoutPolicy.targetReadyFrames)
|
||||
std::size_t producedFrames = 0;
|
||||
while (producedFrames < maxFrames)
|
||||
{
|
||||
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
|
||||
if (decision.action != OutputProductionAction::Produce)
|
||||
break;
|
||||
|
||||
if (!RenderReadyOutputFrame(mVideoIODevice->State(), completion))
|
||||
return filledAny;
|
||||
filledAny = true;
|
||||
break;
|
||||
++producedFrames;
|
||||
metrics = mReadyOutputQueue.GetMetrics();
|
||||
RecordReadyQueueDepthSample(metrics);
|
||||
}
|
||||
return true;
|
||||
return producedFrames;
|
||||
}
|
||||
|
||||
OutputProductionPressure VideoBackend::BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const
|
||||
{
|
||||
OutputProductionPressure pressure;
|
||||
pressure.readyQueueDepth = metrics.depth;
|
||||
pressure.readyQueueCapacity = metrics.capacity;
|
||||
pressure.readyQueueUnderrunCount = metrics.underrunCount;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||
pressure.lateStreak = mLastLateStreak;
|
||||
pressure.dropStreak = mLastDropStreak;
|
||||
}
|
||||
return pressure;
|
||||
}
|
||||
|
||||
bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion)
|
||||
{
|
||||
const auto renderStart = std::chrono::steady_clock::now();
|
||||
VideoIOOutputFrame outputFrame;
|
||||
const auto acquireStart = std::chrono::steady_clock::now();
|
||||
if (!BeginOutputFrame(outputFrame))
|
||||
return false;
|
||||
const auto acquireEnd = std::chrono::steady_clock::now();
|
||||
|
||||
bool rendered = true;
|
||||
const auto renderRequestStart = std::chrono::steady_clock::now();
|
||||
if (mBridge)
|
||||
rendered = mBridge->RenderScheduledFrame(state, completion, outputFrame);
|
||||
const auto renderRequestEnd = std::chrono::steady_clock::now();
|
||||
|
||||
const auto endAccessStart = std::chrono::steady_clock::now();
|
||||
EndOutputFrame(outputFrame);
|
||||
const auto endAccessEnd = std::chrono::steady_clock::now();
|
||||
const double acquireMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(acquireEnd - acquireStart).count();
|
||||
const double renderRequestMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderRequestEnd - renderRequestStart).count();
|
||||
const double endAccessMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(endAccessEnd - endAccessStart).count();
|
||||
|
||||
if (!rendered)
|
||||
{
|
||||
ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame.");
|
||||
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
|
||||
std::chrono::steady_clock::now() - renderStart).count();
|
||||
RecordOutputRenderDuration(renderMilliseconds);
|
||||
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
|
||||
if (outputFrame.nativeFrame != nullptr)
|
||||
{
|
||||
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
|
||||
outputFrame.nativeFrame = nullptr;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
|
||||
std::chrono::steady_clock::now() - renderStart).count();
|
||||
RecordOutputRenderDuration(renderMilliseconds);
|
||||
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
|
||||
|
||||
RenderOutputFrame readyFrame;
|
||||
readyFrame.frame = outputFrame;
|
||||
readyFrame.frameIndex = ++mNextReadyOutputFrameIndex;
|
||||
return mReadyOutputQueue.Push(readyFrame);
|
||||
const bool pushed = mReadyOutputQueue.Push(readyFrame);
|
||||
if (!pushed && outputFrame.nativeFrame != nullptr)
|
||||
{
|
||||
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
|
||||
outputFrame.nativeFrame = nullptr;
|
||||
}
|
||||
return pushed;
|
||||
}
|
||||
|
||||
bool VideoBackend::ScheduleReadyOutputFrame()
|
||||
@@ -516,6 +704,7 @@ void VideoBackend::RecordFramePacing(VideoIOCompletionResult completionResult)
|
||||
|
||||
void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||
if (!mHasReadyQueueDepthBaseline)
|
||||
{
|
||||
mHasReadyQueueDepthBaseline = true;
|
||||
@@ -532,14 +721,18 @@ void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& m
|
||||
++mReadyQueueZeroDepthCount;
|
||||
}
|
||||
|
||||
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds)
|
||||
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
|
||||
mOutputRenderMilliseconds = (std::max)(renderMilliseconds, 0.0);
|
||||
if (mSmoothedOutputRenderMilliseconds <= 0.0)
|
||||
mSmoothedOutputRenderMilliseconds = mOutputRenderMilliseconds;
|
||||
else
|
||||
mSmoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds * 0.9 + mOutputRenderMilliseconds * 0.1;
|
||||
mMaxOutputRenderMilliseconds = (std::max)(mMaxOutputRenderMilliseconds, mOutputRenderMilliseconds);
|
||||
mOutputFrameAcquireMilliseconds = (std::max)(acquireMilliseconds, 0.0);
|
||||
mOutputFrameRenderRequestMilliseconds = (std::max)(renderRequestMilliseconds, 0.0);
|
||||
mOutputFrameEndAccessMilliseconds = (std::max)(endAccessMilliseconds, 0.0);
|
||||
|
||||
PublishTimingSample("VideoBackend", "outputRender", mOutputRenderMilliseconds, "ms");
|
||||
PublishTimingSample("VideoBackend", "smoothedOutputRender", mSmoothedOutputRenderMilliseconds, "ms");
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "OutputProductionController.h"
|
||||
#include "RenderOutputQueue.h"
|
||||
#include "VideoBackendLifecycle.h"
|
||||
#include "VideoIOTypes.h"
|
||||
@@ -59,6 +60,7 @@ public:
|
||||
bool KeyerInterfaceAvailable() const;
|
||||
bool ExternalKeyingActive() const;
|
||||
const std::string& StatusMessage() const;
|
||||
bool ShouldPrioritizeOutputOverPreview() const;
|
||||
void SetStatusMessage(const std::string& message);
|
||||
void PublishStatus(bool externalKeyingConfigured, const std::string& statusMessage = std::string());
|
||||
void ReportNoInputDeviceSignalStatus();
|
||||
@@ -69,14 +71,20 @@ private:
|
||||
void StartOutputCompletionWorker();
|
||||
void StopOutputCompletionWorker();
|
||||
void OutputCompletionWorkerMain();
|
||||
void StartOutputProducerWorker();
|
||||
void StopOutputProducerWorker();
|
||||
void OutputProducerWorkerMain();
|
||||
void NotifyOutputProducer();
|
||||
std::chrono::milliseconds OutputProducerWakeInterval() const;
|
||||
void ProcessOutputFrameCompletion(const VideoIOCompletion& completion);
|
||||
bool FillReadyOutputQueue(const VideoIOCompletion& completion);
|
||||
std::size_t ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames);
|
||||
OutputProductionPressure BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const;
|
||||
bool RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion);
|
||||
bool ScheduleReadyOutputFrame();
|
||||
bool ScheduleBlackUnderrunFrame();
|
||||
void RecordFramePacing(VideoIOCompletionResult completionResult);
|
||||
void RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics);
|
||||
void RecordOutputRenderDuration(double renderMilliseconds);
|
||||
void RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds);
|
||||
bool ApplyLifecycleTransition(VideoBackendLifecycleState state, const std::string& message);
|
||||
bool ApplyLifecycleFailure(const std::string& message);
|
||||
void PublishBackendStateChanged(const std::string& state, const std::string& message);
|
||||
@@ -92,6 +100,7 @@ private:
|
||||
RuntimeEventDispatcher& mRuntimeEventDispatcher;
|
||||
VideoBackendLifecycle mLifecycle;
|
||||
VideoPlayoutPolicy mPlayoutPolicy;
|
||||
OutputProductionController mOutputProductionController;
|
||||
RenderOutputQueue mReadyOutputQueue;
|
||||
std::unique_ptr<VideoIODevice> mVideoIODevice;
|
||||
std::unique_ptr<OpenGLVideoIOBridge> mBridge;
|
||||
@@ -99,8 +108,17 @@ private:
|
||||
std::condition_variable mOutputCompletionCondition;
|
||||
std::deque<VideoIOCompletion> mPendingOutputCompletions;
|
||||
std::thread mOutputCompletionWorker;
|
||||
std::mutex mOutputProducerMutex;
|
||||
std::condition_variable mOutputProducerCondition;
|
||||
std::thread mOutputProducerWorker;
|
||||
VideoIOCompletion mLastOutputProductionCompletion;
|
||||
std::chrono::steady_clock::time_point mLastOutputProductionTime;
|
||||
std::mutex mOutputProductionMutex;
|
||||
mutable std::mutex mOutputMetricsMutex;
|
||||
bool mOutputCompletionWorkerRunning = false;
|
||||
bool mOutputCompletionWorkerStopping = false;
|
||||
bool mOutputProducerWorkerRunning = false;
|
||||
bool mOutputProducerWorkerStopping = false;
|
||||
uint64_t mNextReadyOutputFrameIndex = 0;
|
||||
uint64_t mInputFrameIndex = 0;
|
||||
uint64_t mOutputFrameScheduleIndex = 0;
|
||||
@@ -121,6 +139,11 @@ private:
|
||||
double mOutputRenderMilliseconds = 0.0;
|
||||
double mSmoothedOutputRenderMilliseconds = 0.0;
|
||||
double mMaxOutputRenderMilliseconds = 0.0;
|
||||
double mOutputFrameAcquireMilliseconds = 0.0;
|
||||
double mOutputFrameRenderRequestMilliseconds = 0.0;
|
||||
double mOutputFrameEndAccessMilliseconds = 0.0;
|
||||
uint64_t mLastLateStreak = 0;
|
||||
uint64_t mLastDropStreak = 0;
|
||||
uint64_t mLateFrameCount = 0;
|
||||
uint64_t mDroppedFrameCount = 0;
|
||||
uint64_t mFlushedFrameCount = 0;
|
||||
|
||||
@@ -30,5 +30,8 @@ inline VideoPlayoutPolicy NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy policy)
|
||||
policy.targetReadyFrames = 1;
|
||||
if (policy.maxReadyFrames < policy.targetReadyFrames)
|
||||
policy.maxReadyFrames = policy.targetReadyFrames;
|
||||
const unsigned minimumOutputFramePoolSize = policy.targetPrerollFrames + policy.maxReadyFrames + policy.minimumSpareDeviceFrames;
|
||||
if (policy.outputFramePoolSize < minimumOutputFramePoolSize)
|
||||
policy.outputFramePoolSize = minimumOutputFramePoolSize;
|
||||
return policy;
|
||||
}
|
||||
|
||||
@@ -423,7 +423,6 @@ bool DeckLinkSession::AcquireNextOutputVideoFrame(CComPtr<IDeckLinkMutableVideoF
|
||||
return false;
|
||||
|
||||
outputVideoFrame = outputVideoFrameQueue.front();
|
||||
outputVideoFrameQueue.push_back(outputVideoFrame);
|
||||
outputVideoFrameQueue.pop_front();
|
||||
return outputVideoFrame != nullptr;
|
||||
}
|
||||
@@ -448,6 +447,7 @@ bool DeckLinkSession::PopulateOutputFrame(IDeckLinkMutableVideoFrame* outputVide
|
||||
frame.width = mState.outputFrameSize.width;
|
||||
frame.height = mState.outputFrameSize.height;
|
||||
frame.pixelFormat = mState.outputPixelFormat;
|
||||
outputVideoFrame->AddRef();
|
||||
frame.nativeFrame = outputVideoFrame;
|
||||
frame.nativeBuffer = outputVideoFrameBuffer.Detach();
|
||||
return true;
|
||||
@@ -506,7 +506,10 @@ VideoPlayoutRecoveryDecision DeckLinkSession::AccountForCompletionResult(VideoIO
|
||||
bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame)
|
||||
{
|
||||
IDeckLinkMutableVideoFrame* outputVideoFrame = static_cast<IDeckLinkMutableVideoFrame*>(frame.nativeFrame);
|
||||
return ScheduleFrame(outputVideoFrame);
|
||||
const bool scheduled = ScheduleFrame(outputVideoFrame);
|
||||
if (outputVideoFrame != nullptr)
|
||||
outputVideoFrame->Release();
|
||||
return scheduled;
|
||||
}
|
||||
|
||||
bool DeckLinkSession::Start()
|
||||
@@ -616,8 +619,18 @@ void DeckLinkSession::HandleVideoInputFrame(IDeckLinkVideoInputFrame* inputFrame
|
||||
inputFrameBuffer->EndAccess(bmdBufferAccessRead);
|
||||
}
|
||||
|
||||
void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame*, BMDOutputFrameCompletionResult completionResult)
|
||||
void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult)
|
||||
{
|
||||
if (completedFrame != nullptr)
|
||||
{
|
||||
CComPtr<IDeckLinkMutableVideoFrame> reusableFrame;
|
||||
if (completedFrame->QueryInterface(IID_IDeckLinkMutableVideoFrame, reinterpret_cast<void**>(&reusableFrame)) == S_OK &&
|
||||
reusableFrame != nullptr)
|
||||
{
|
||||
outputVideoFrameQueue.push_back(reusableFrame);
|
||||
}
|
||||
}
|
||||
|
||||
if (!mOutputFrameCallback)
|
||||
return;
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ Phase 7 made backend lifecycle, playout policy, ready-frame queueing, late/drop
|
||||
## Status
|
||||
|
||||
- Phase 7.5 design package: proposed.
|
||||
- Phase 7.5 implementation: Step 2 in progress.
|
||||
- Phase 7.5 implementation: Step 5 in progress.
|
||||
- Current alignment: Phase 7 is complete. `RenderOutputQueue`, `VideoPlayoutPolicy`, `VideoPlayoutScheduler`, `VideoBackendLifecycle`, and backend playout telemetry exist. The backend worker fills the ready queue on completion demand, but render production is not yet proactively driven by queue pressure or video cadence.
|
||||
|
||||
Current footholds:
|
||||
@@ -19,6 +19,9 @@ Current footholds:
|
||||
- `HealthTelemetry::BackendPlayoutSnapshot` exposes queue depth, underruns, late/drop streaks, and recovery decisions.
|
||||
- Step 1 adds baseline timing fields for ready-queue min/max/zero-depth samples and output render duration.
|
||||
- Step 2 adds a pure `OutputProductionController` for queue-pressure production decisions.
|
||||
- Step 3 adds a proactive output producer worker that keeps `RenderOutputQueue` warm after playback starts.
|
||||
- Step 4 skips non-forced preview presentation while output ready-queue depth is below target.
|
||||
- Step 5 makes async readback misses prefer cached output over synchronous readback after bootstrap.
|
||||
|
||||
## Timing Review Findings
|
||||
|
||||
@@ -199,15 +202,23 @@ Move from demand-filled output production to queue-pressure production.
|
||||
|
||||
Initial target:
|
||||
|
||||
- producer wakes when queue depth is below target
|
||||
- producer requests render-thread output production until target depth is reached
|
||||
- producer stops when backend stops or render thread shuts down
|
||||
- completion worker mostly schedules from already-ready frames
|
||||
- [x] producer wakes when queue depth is below target
|
||||
- [x] producer requests render-thread output production until target depth is reached
|
||||
- [x] producer stops when backend stops or render thread shuts down
|
||||
- [x] completion worker mostly schedules from already-ready frames
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- normal playback does not depend on completion processing to fill the queue from empty
|
||||
- callback/completion pressure and render production pressure are separate
|
||||
- [x] normal playback does not depend on completion processing to fill the queue from empty
|
||||
- [x] callback/completion pressure and render production pressure are separate
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- `VideoBackend` starts the completion worker before device start, then starts the output producer only after DeckLink start succeeds. This avoids fighting DeckLink preroll for the same output frame pool.
|
||||
- `OutputProducerWorkerMain()` periodically wakes and uses `OutputProductionController` to decide whether to produce, wait, or throttle.
|
||||
- Completion handling records pacing/recovery, updates producer pressure, schedules a ready frame, and wakes the producer to refill headroom.
|
||||
- Completion handling keeps a one-frame synchronous fallback when the ready queue is unexpectedly empty, then falls back to black underrun behavior if that also fails.
|
||||
- Producer shutdown is explicit and joined before video output teardown.
|
||||
|
||||
### Step 4. Prioritize Playout Over Preview
|
||||
|
||||
@@ -215,15 +226,21 @@ Make preview explicitly subordinate to output playout deadlines.
|
||||
|
||||
Initial target:
|
||||
|
||||
- skip or delay preview when ready queue depth is below target
|
||||
- [x] skip or delay preview when ready queue depth is below target
|
||||
- count skipped previews
|
||||
- record preview present cost separately from output render cost
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- preview cannot drain output headroom invisibly
|
||||
- [x] preview cannot drain output headroom invisibly
|
||||
- runtime telemetry shows preview skips and preview present cost
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- `OpenGLComposite::paintGL(false)` now skips preview presentation when `VideoBackend` reports that the ready queue is below the target depth.
|
||||
- Forced preview paints are still allowed so resize/manual paint behavior remains intact.
|
||||
- Preview skip counters and present-cost telemetry remain follow-up work for this step.
|
||||
|
||||
### Step 5. Make Readback Miss Policy Deadline-Aware
|
||||
|
||||
Avoid turning a late async readback fence into synchronous deadline pressure by default.
|
||||
@@ -232,13 +249,20 @@ Initial target:
|
||||
|
||||
- count async readback misses
|
||||
- count synchronous fallback uses
|
||||
- allow policy to prefer stale/black output over synchronous fallback when queue pressure is high
|
||||
- keep current fallback available while behavior is measured
|
||||
- [x] allow policy to prefer stale/black output over synchronous fallback when queue pressure is high
|
||||
- [x] keep current fallback available while behavior is measured
|
||||
|
||||
Exit criteria:
|
||||
|
||||
- readback fallback is an explicit policy decision
|
||||
- late GPU fences do not automatically block the most timing-sensitive path
|
||||
- [x] readback fallback is an explicit policy decision
|
||||
- [x] late GPU fences do not automatically block the most timing-sensitive path
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- `OpenGLRenderPipeline::ReadOutputFrame()` now uses synchronous readback only to bootstrap the first cached output frame.
|
||||
- After cached output exists, an async readback miss copies the cached output frame into the DeckLink output frame instead of blocking on synchronous `glReadPixels`.
|
||||
- Async readback queueing now skips when the next PBO slot is still in flight rather than deleting an in-flight fence and overwriting it.
|
||||
- Miss/fallback counters remain follow-up telemetry work for this step.
|
||||
|
||||
### Step 6. Tune Headroom Policy
|
||||
|
||||
|
||||
165
docs/PHASE_7_5_READBACK_EXPERIMENT_LOG.md
Normal file
165
docs/PHASE_7_5_READBACK_EXPERIMENT_LOG.md
Normal file
@@ -0,0 +1,165 @@
|
||||
# Phase 7.5 Readback Experiment Log
|
||||
|
||||
This log tracks short readback experiments during the proactive playout timing work.
|
||||
|
||||
## How To Run
|
||||
|
||||
The default debugger launch keeps the current production path:
|
||||
|
||||
- `Debug LoopThroughWithOpenGLCompositing`
|
||||
- `VST_OUTPUT_READBACK_MODE` unset
|
||||
- mode: `async_pbo`
|
||||
|
||||
Comparison modes are still available:
|
||||
|
||||
- `VST_OUTPUT_READBACK_MODE=async_pbo`
|
||||
- uses the older PBO/fence readback path
|
||||
|
||||
The experiment launches are:
|
||||
|
||||
- `Debug LoopThroughWithOpenGLCompositing - sync readback experiment`
|
||||
- `VST_OUTPUT_READBACK_MODE=sync`
|
||||
- uses direct synchronous `glReadPixels()` every output frame
|
||||
|
||||
- `Debug LoopThroughWithOpenGLCompositing - cached output experiment`
|
||||
- `VST_OUTPUT_READBACK_MODE=cached_only`
|
||||
- uses one bootstrap synchronous readback, then copies the cached output frame without ongoing GPU readback
|
||||
|
||||
The cached-output experiment is not visually correct for live motion. It exists to test whether removing ongoing GPU readback lets the producer fill the ready queue again.
|
||||
|
||||
## Experiment 3: fast_transfer
|
||||
|
||||
Status: removed from active code after hardware sample
|
||||
|
||||
Date: 2026-05-11
|
||||
|
||||
Change:
|
||||
|
||||
- DeckLink output frames are now created with `CreateVideoFrameWithBuffer()`.
|
||||
- Output frame buffers are owned by `PinnedMemoryAllocator`.
|
||||
- `VideoIOOutputFrame` carries a texture-transfer callback.
|
||||
- The test branch changed the default render readback path to try `VideoFrameTransfer::GPUtoCPU` against the output texture for BGRA output.
|
||||
- If fast transfer is unavailable or fails, the code falls back to cached output if present, then synchronous readback as a safety fallback.
|
||||
|
||||
Question:
|
||||
|
||||
Can SDK-style pinned/DVP transfer recover real rendered output timing without the visually-invalid cached-only shortcut?
|
||||
|
||||
Result:
|
||||
|
||||
- The test machine reported `GL_VENDOR=NVIDIA Corporation` and `GL_RENDERER=NVIDIA GeForce RTX 4060 Ti/PCIe/SSE2`.
|
||||
- The DeckLink SDK OpenGL fast-transfer sample gates NVIDIA DVP on `GL_RENDERER` containing `Quadro`.
|
||||
- `GL_AMD_pinned_memory` was also unavailable.
|
||||
- The fast-transfer path was removed from active code to avoid carrying unsupported DVP dependencies while we investigate CPU-frame buffering and render-ahead.
|
||||
|
||||
## Baseline: async_pbo
|
||||
|
||||
Date: 2026-05-11
|
||||
|
||||
Observed while the app was running after adding the async queue split counters.
|
||||
|
||||
Summary:
|
||||
|
||||
- ready queue was pinned at 0 or briefly 1
|
||||
- underrun, zero-depth, late, and dropped counts increased continuously
|
||||
- `renderRequestMs` usually sat around 16-25 ms, with occasional larger spikes
|
||||
- `asyncQueueMs` was mostly explained by `asyncQueueReadPixelsMs`
|
||||
- PBO allocation/orphaning was effectively 0 ms
|
||||
|
||||
Representative samples:
|
||||
|
||||
| readyDepth | renderRequestMs | queueWaitMs | drawMs | mapMs | copyMs | asyncQueueMs | asyncQueueBufferMs | asyncQueueReadPixelsMs |
|
||||
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
||||
| 0 | 24.915 | 3.018 | 0.510 | 0.923 | 0.768 | 9.018 | 0.000 | 9.001 |
|
||||
| 0 | 16.226 | 3.066 | 0.518 | 1.202 | 0.812 | 8.611 | 0.000 | 8.598 |
|
||||
| 0 | 12.134 | 3.796 | 3.579 | 1.378 | 0.690 | 10.323 | 0.000 | 10.311 |
|
||||
| 0 | 17.496 | 2.817 | 0.523 | 1.267 | 1.160 | 9.416 | 0.000 | 9.403 |
|
||||
|
||||
Initial read:
|
||||
|
||||
The main repeated cost is issuing `glReadPixels(..., nullptr)` into the PBO. `glBufferData`, setup, fence creation, fence wait, map, and CPU copy are not large enough to explain the underruns.
|
||||
|
||||
## Experiment 1: sync
|
||||
|
||||
Status: sampled
|
||||
|
||||
Question:
|
||||
|
||||
Does the direct synchronous readback path perform better or worse than the current PBO path on this machine and DeckLink format?
|
||||
|
||||
Expected interpretation:
|
||||
|
||||
- If `syncReadMs` is lower than `asyncQueueReadPixelsMs` and the ready queue improves, the current PBO path is the wrong strategy for this driver/format.
|
||||
- If `syncReadMs` is also high and the ready queue remains empty, any GPU-to-CPU readback in this path is too expensive for the current producer cadence.
|
||||
|
||||
Results:
|
||||
|
||||
Date: 2026-05-11
|
||||
|
||||
Summary:
|
||||
|
||||
- ready queue remained pinned at 0
|
||||
- underrun, zero-depth, late, and dropped counts continued increasing
|
||||
- `asyncQueueMs` and async readback counters were 0, confirming the experiment mode was active
|
||||
- direct `syncReadMs` was generally worse than the baseline PBO `asyncQueueReadPixelsMs`
|
||||
|
||||
Representative samples:
|
||||
|
||||
| readyDepth | renderRequestMs | queueWaitMs | drawMs | syncReadMs | asyncQueueMs | syncFallbackCount |
|
||||
| --- | ---: | ---: | ---: | ---: | ---: | ---: |
|
||||
| 0 | 32.467 | 5.764 | 1.389 | 23.122 | 0.000 | 680 |
|
||||
| 0 | 29.722 | 2.603 | 0.512 | 25.538 | 0.000 | 697 |
|
||||
| 0 | 37.844 | 7.716 | 0.518 | 23.608 | 0.000 | 706 |
|
||||
| 0 | 22.304 | 3.089 | 1.843 | 15.278 | 0.000 | 723 |
|
||||
| 0 | 27.196 | 4.015 | 0.500 | 21.933 | 0.000 | 736 |
|
||||
|
||||
Read:
|
||||
|
||||
Direct synchronous readback does not recover the queue and is slower than the async PBO path on the sampled run. The bottleneck appears to be GPU-to-CPU readback itself, not PBO orphaning or fence handling.
|
||||
|
||||
## Experiment 2: cached_only
|
||||
|
||||
Status: sampled
|
||||
|
||||
Question:
|
||||
|
||||
If ongoing GPU readback is removed after bootstrap, can the producer keep the ready queue above 0?
|
||||
|
||||
Expected interpretation:
|
||||
|
||||
- If ready depth rises and underruns slow or stop, readback is the primary bottleneck.
|
||||
- If ready depth still stays near 0, the bottleneck is elsewhere in scheduling, frame acquisition, queueing, or DeckLink handoff.
|
||||
|
||||
Results:
|
||||
|
||||
Date: 2026-05-11
|
||||
|
||||
User-visible result:
|
||||
|
||||
- DeckLink reported a healthy 5-frame buffer.
|
||||
|
||||
Telemetry summary:
|
||||
|
||||
- `renderRequestMs` dropped to roughly 1-3 ms.
|
||||
- `cachedCopyMs` was usually around 0.8-1.0 ms, with one sampled low value around 0.37 ms.
|
||||
- `asyncQueueMs`, `asyncQueueReadPixelsMs`, `syncReadMs`, fence wait, map, and async copy were 0 after bootstrap.
|
||||
- `syncFallbackCount` stayed at 1, confirming one bootstrap readback.
|
||||
- `cachedFallbackCount` increased continuously, confirming ongoing frames were served from cached CPU memory.
|
||||
- late and dropped counts were 0 during the sampled run.
|
||||
- internal ready queue depth still reported mostly 0-1 even while DeckLink showed a healthy hardware/device buffer.
|
||||
|
||||
Representative samples:
|
||||
|
||||
| readyDepth | renderRequestMs | queueWaitMs | drawMs | cachedCopyMs | asyncQueueMs | syncReadMs | late | dropped |
|
||||
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
||||
| 0 | 1.446 | 0.018 | 0.518 | 0.864 | 0.000 | 0.000 | 0 | 0 |
|
||||
| 0 | 2.586 | 1.089 | 0.514 | 0.829 | 0.000 | 0.000 | 0 | 0 |
|
||||
| 0 | 1.481 | 2.378 | 0.502 | 0.911 | 0.000 | 0.000 | 0 | 0 |
|
||||
| 0 | 0.892 | 0.013 | 0.468 | 0.371 | 0.000 | 0.000 | 0 | 0 |
|
||||
| 1 | 1.398 | 0.019 | 0.483 | 0.819 | 0.000 | 0.000 | 0 | 0 |
|
||||
|
||||
Read:
|
||||
|
||||
Removing ongoing GPU readback recovers output timing immediately. The direct cause of the Phase 7.5 playback collapse is the per-frame GPU-to-CPU readback cost, not DeckLink frame acquisition, output frame end-access, PBO allocation, fence waiting, or CPU copy.
|
||||
|
||||
The internal ready queue depth still being low while DeckLink reports a healthy device buffer suggests the ready queue is acting as a short staging queue rather than the full device playout buffer. For the next fix, prioritize avoiding a blocking readback on every output frame instead of only increasing internal ready queue depth.
|
||||
@@ -363,6 +363,10 @@ components:
|
||||
$ref: "#/components/schemas/VideoIOStatus"
|
||||
performance:
|
||||
$ref: "#/components/schemas/PerformanceStatus"
|
||||
backendPlayout:
|
||||
$ref: "#/components/schemas/BackendPlayoutStatus"
|
||||
runtimeEvents:
|
||||
$ref: "#/components/schemas/RuntimeEventStatus"
|
||||
shaders:
|
||||
type: array
|
||||
items:
|
||||
@@ -382,10 +386,16 @@ components:
|
||||
type: number
|
||||
oscPort:
|
||||
type: number
|
||||
oscBindAddress:
|
||||
type: string
|
||||
oscSmoothing:
|
||||
type: number
|
||||
autoReload:
|
||||
type: boolean
|
||||
maxTemporalHistoryFrames:
|
||||
type: number
|
||||
previewFps:
|
||||
type: number
|
||||
enableExternalKeying:
|
||||
type: boolean
|
||||
inputVideoFormat:
|
||||
@@ -478,6 +488,175 @@ components:
|
||||
type: number
|
||||
flushedFrameCount:
|
||||
type: number
|
||||
BackendPlayoutStatus:
|
||||
type: object
|
||||
properties:
|
||||
lifecycleState:
|
||||
type: string
|
||||
example: running
|
||||
degraded:
|
||||
type: boolean
|
||||
statusMessage:
|
||||
type: string
|
||||
lateFrameCount:
|
||||
type: number
|
||||
droppedFrameCount:
|
||||
type: number
|
||||
flushedFrameCount:
|
||||
type: number
|
||||
readyQueue:
|
||||
$ref: "#/components/schemas/BackendReadyQueueStatus"
|
||||
outputRender:
|
||||
$ref: "#/components/schemas/BackendOutputRenderStatus"
|
||||
recovery:
|
||||
$ref: "#/components/schemas/BackendPlayoutRecoveryStatus"
|
||||
BackendReadyQueueStatus:
|
||||
type: object
|
||||
properties:
|
||||
depth:
|
||||
type: number
|
||||
description: Current number of ready output frames.
|
||||
capacity:
|
||||
type: number
|
||||
description: Maximum ready output frames currently allowed.
|
||||
minDepth:
|
||||
type: number
|
||||
description: Minimum observed ready queue depth since backend worker start.
|
||||
maxDepth:
|
||||
type: number
|
||||
description: Maximum observed ready queue depth since backend worker start.
|
||||
zeroDepthCount:
|
||||
type: number
|
||||
description: Number of observed samples where the ready queue was empty.
|
||||
pushedCount:
|
||||
type: number
|
||||
poppedCount:
|
||||
type: number
|
||||
droppedCount:
|
||||
type: number
|
||||
underrunCount:
|
||||
type: number
|
||||
BackendOutputRenderStatus:
|
||||
type: object
|
||||
properties:
|
||||
renderMs:
|
||||
type: number
|
||||
description: Most recent output render duration in milliseconds.
|
||||
smoothedRenderMs:
|
||||
type: number
|
||||
description: Smoothed output render duration in milliseconds.
|
||||
maxRenderMs:
|
||||
type: number
|
||||
description: Maximum observed output render duration in milliseconds.
|
||||
acquireFrameMs:
|
||||
type: number
|
||||
description: Time spent acquiring a writable backend output frame in milliseconds.
|
||||
renderRequestMs:
|
||||
type: number
|
||||
description: Time spent executing the render-thread output frame request in milliseconds.
|
||||
endAccessMs:
|
||||
type: number
|
||||
description: Time spent ending write access to the backend output frame in milliseconds.
|
||||
queueWaitMs:
|
||||
type: number
|
||||
description: Time the output render request spent waiting for the render thread in milliseconds.
|
||||
drawMs:
|
||||
type: number
|
||||
description: Time spent drawing, blitting, packing, and flushing the output frame in milliseconds.
|
||||
fenceWaitMs:
|
||||
type: number
|
||||
description: Time spent waiting for the async readback fence in milliseconds.
|
||||
mapMs:
|
||||
type: number
|
||||
description: Time spent mapping the async readback pixel buffer in milliseconds.
|
||||
readbackCopyMs:
|
||||
type: number
|
||||
description: Time spent copying async readback bytes into the backend output frame in milliseconds.
|
||||
cachedCopyMs:
|
||||
type: number
|
||||
description: Time spent copying the cached output frame when async readback is not ready in milliseconds.
|
||||
asyncQueueMs:
|
||||
type: number
|
||||
description: Time spent queueing the next async readback in milliseconds.
|
||||
asyncQueueBufferMs:
|
||||
type: number
|
||||
description: Time spent orphaning or allocating the async readback pixel buffer in milliseconds.
|
||||
asyncQueueSetupMs:
|
||||
type: number
|
||||
description: Time spent applying readback pixel-store, framebuffer, and pixel-pack-buffer state in milliseconds.
|
||||
asyncQueueReadPixelsMs:
|
||||
type: number
|
||||
description: Time spent issuing glReadPixels for the async readback in milliseconds.
|
||||
asyncQueueFenceMs:
|
||||
type: number
|
||||
description: Time spent creating the async readback fence in milliseconds.
|
||||
syncReadMs:
|
||||
type: number
|
||||
description: Time spent in bootstrap synchronous readback in milliseconds.
|
||||
asyncReadbackMissCount:
|
||||
type: number
|
||||
description: Count of output render requests where async readback was not ready.
|
||||
cachedFallbackCount:
|
||||
type: number
|
||||
description: Count of output render requests served from the cached output frame.
|
||||
syncFallbackCount:
|
||||
type: number
|
||||
description: Count of output render requests that used bootstrap synchronous readback.
|
||||
BackendPlayoutRecoveryStatus:
|
||||
type: object
|
||||
properties:
|
||||
completionResult:
|
||||
type: string
|
||||
enum: [Completed, DisplayedLate, Dropped, Flushed, Unknown]
|
||||
completedFrameIndex:
|
||||
type: number
|
||||
scheduledFrameIndex:
|
||||
type: number
|
||||
scheduledLeadFrames:
|
||||
type: number
|
||||
measuredLagFrames:
|
||||
type: number
|
||||
catchUpFrames:
|
||||
type: number
|
||||
lateStreak:
|
||||
type: number
|
||||
dropStreak:
|
||||
type: number
|
||||
RuntimeEventStatus:
|
||||
type: object
|
||||
properties:
|
||||
queue:
|
||||
$ref: "#/components/schemas/RuntimeEventQueueStatus"
|
||||
dispatch:
|
||||
$ref: "#/components/schemas/RuntimeEventDispatchStatus"
|
||||
RuntimeEventQueueStatus:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
depth:
|
||||
type: number
|
||||
capacity:
|
||||
type: number
|
||||
droppedCount:
|
||||
type: number
|
||||
oldestEventAgeMs:
|
||||
type: number
|
||||
RuntimeEventDispatchStatus:
|
||||
type: object
|
||||
properties:
|
||||
dispatchCallCount:
|
||||
type: number
|
||||
dispatchedEventCount:
|
||||
type: number
|
||||
handlerInvocationCount:
|
||||
type: number
|
||||
handlerFailureCount:
|
||||
type: number
|
||||
lastDispatchDurationMs:
|
||||
type: number
|
||||
maxDispatchDurationMs:
|
||||
type: number
|
||||
ShaderSummary:
|
||||
type: object
|
||||
properties:
|
||||
@@ -497,6 +676,8 @@ components:
|
||||
description: Error text for unavailable shader packages.
|
||||
temporal:
|
||||
$ref: "#/components/schemas/TemporalState"
|
||||
feedback:
|
||||
$ref: "#/components/schemas/FeedbackState"
|
||||
TemporalState:
|
||||
type: object
|
||||
properties:
|
||||
@@ -509,6 +690,13 @@ components:
|
||||
type: number
|
||||
effectiveHistoryLength:
|
||||
type: number
|
||||
FeedbackState:
|
||||
type: object
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
writePass:
|
||||
type: string
|
||||
LayerState:
|
||||
type: object
|
||||
properties:
|
||||
|
||||
@@ -95,6 +95,9 @@ void TestBackendPlayoutHealth()
|
||||
8.5,
|
||||
7.25,
|
||||
12.0,
|
||||
1.0,
|
||||
6.5,
|
||||
0.5,
|
||||
8,
|
||||
11,
|
||||
3,
|
||||
@@ -121,6 +124,9 @@ void TestBackendPlayoutHealth()
|
||||
Expect(playout.outputRenderMilliseconds == 8.5, "backend playout health stores output render duration");
|
||||
Expect(playout.smoothedOutputRenderMilliseconds == 7.25, "backend playout health stores smoothed output render duration");
|
||||
Expect(playout.maxOutputRenderMilliseconds == 12.0, "backend playout health stores max output render duration");
|
||||
Expect(playout.outputFrameAcquireMilliseconds == 1.0, "backend playout health stores output frame acquire duration");
|
||||
Expect(playout.outputFrameRenderRequestMilliseconds == 6.5, "backend playout health stores output render request duration");
|
||||
Expect(playout.outputFrameEndAccessMilliseconds == 0.5, "backend playout health stores output frame end access duration");
|
||||
Expect(playout.completedFrameIndex == 8, "backend playout health stores completed index");
|
||||
Expect(playout.scheduledFrameIndex == 11, "backend playout health stores scheduled index");
|
||||
Expect(playout.measuredLagFrames == 2, "backend playout health stores measured lag");
|
||||
@@ -148,6 +154,9 @@ void TestBackendPlayoutHealth()
|
||||
-5.0,
|
||||
-4.0,
|
||||
-3.0,
|
||||
-2.0,
|
||||
-1.0,
|
||||
-0.5,
|
||||
9,
|
||||
12,
|
||||
3,
|
||||
@@ -165,6 +174,32 @@ void TestBackendPlayoutHealth()
|
||||
Expect(snapshot.backendPlayout.lifecycleState == "Running", "full health snapshot includes backend playout state");
|
||||
Expect(!snapshot.backendPlayout.degraded, "full health snapshot includes backend degraded state");
|
||||
}
|
||||
|
||||
void TestOutputRenderPipelineTiming()
|
||||
{
|
||||
HealthTelemetry telemetry;
|
||||
telemetry.RecordOutputRenderQueueWait(2.5);
|
||||
telemetry.RecordOutputRenderPipelineTiming(1.0, 0.5, 0.25, 0.75, 0.125, 0.375, 0.1, 0.2, 0.3, 0.4, 3.5, true, true, false);
|
||||
Expect(telemetry.TryRecordOutputRenderPipelineTiming(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, false, false, true),
|
||||
"try output render timing succeeds when uncontended");
|
||||
|
||||
const HealthTelemetry::BackendPlayoutSnapshot playout = telemetry.GetBackendPlayoutSnapshot();
|
||||
Expect(playout.outputRenderQueueWaitMilliseconds == 2.5, "output render timing stores queue wait");
|
||||
Expect(playout.outputRenderDrawMilliseconds == 0.0, "output render timing clamps draw duration");
|
||||
Expect(playout.outputReadbackFenceWaitMilliseconds == 0.0, "output render timing clamps fence wait duration");
|
||||
Expect(playout.outputReadbackMapMilliseconds == 0.0, "output render timing clamps map duration");
|
||||
Expect(playout.outputReadbackCopyMilliseconds == 0.0, "output render timing clamps readback copy duration");
|
||||
Expect(playout.outputCachedCopyMilliseconds == 0.0, "output render timing clamps cached copy duration");
|
||||
Expect(playout.outputAsyncQueueMilliseconds == 0.0, "output render timing clamps async queue duration");
|
||||
Expect(playout.outputAsyncQueueBufferMilliseconds == 0.0, "output render timing clamps async queue buffer duration");
|
||||
Expect(playout.outputAsyncQueueSetupMilliseconds == 0.0, "output render timing clamps async queue setup duration");
|
||||
Expect(playout.outputAsyncQueueReadPixelsMilliseconds == 0.0, "output render timing clamps async queue read pixels duration");
|
||||
Expect(playout.outputAsyncQueueFenceMilliseconds == 0.0, "output render timing clamps async queue fence duration");
|
||||
Expect(playout.outputSyncReadMilliseconds == 0.0, "output render timing clamps sync read duration");
|
||||
Expect(playout.outputAsyncReadbackMissCount == 1, "output render timing counts async readback misses");
|
||||
Expect(playout.outputCachedFallbackCount == 1, "output render timing counts cached fallbacks");
|
||||
Expect(playout.outputSyncFallbackCount == 1, "output render timing counts sync fallbacks");
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
@@ -174,6 +209,7 @@ int main()
|
||||
TestRuntimeEventTryRecord();
|
||||
TestPersistenceWriteHealth();
|
||||
TestBackendPlayoutHealth();
|
||||
TestOutputRenderPipelineTiming();
|
||||
|
||||
if (gFailures != 0)
|
||||
{
|
||||
|
||||
@@ -93,9 +93,10 @@ void TestPolicyNormalization()
|
||||
policy.maxReadyFrames = 2;
|
||||
|
||||
VideoPlayoutPolicy normalized = NormalizeVideoPlayoutPolicy(policy);
|
||||
Expect(normalized.outputFramePoolSize == 1, "policy normalization keeps at least one output frame");
|
||||
Expect(normalized.targetPrerollFrames == 1, "policy normalization keeps at least one preroll frame");
|
||||
Expect(normalized.maxReadyFrames == normalized.targetReadyFrames, "policy normalization keeps max ready frames above target");
|
||||
Expect(normalized.outputFramePoolSize >= normalized.targetPrerollFrames + normalized.maxReadyFrames + normalized.minimumSpareDeviceFrames,
|
||||
"policy normalization keeps enough output frames for preroll and ready queue ownership");
|
||||
}
|
||||
|
||||
void TestFrameBudgets()
|
||||
|
||||
Reference in New Issue
Block a user