Performance chasing
All checks were successful
CI / React UI Build (push) Successful in 10s
CI / Native Windows Build And Tests (push) Successful in 2m51s
CI / Windows Release Package (push) Successful in 2m55s

This commit is contained in:
Aiden
2026-05-11 23:10:45 +10:00
parent c5cead6003
commit a434a88108
18 changed files with 1115 additions and 82 deletions

View File

@@ -18,6 +18,7 @@ RenderEngine::RenderEngine(
mRenderPass(mRenderer),
mRenderPipeline(mRenderer, runtimeSnapshotProvider, healthTelemetry, std::move(renderEffect), std::move(screenshotReady), std::move(previewPaint)),
mShaderPrograms(mRenderer, runtimeSnapshotProvider),
mHealthTelemetry(healthTelemetry),
mHdc(hdc),
mHglrc(hglrc),
mFrameStateResolver(runtimeSnapshotProvider)
@@ -546,7 +547,11 @@ bool RenderEngine::RequestOutputFrame(const RenderPipelineFrameContext& context,
{
if (mRenderThreadRunning)
{
return TryInvokeOnRenderThread("output-render", [this, &context, &outputFrame]() {
const auto queuedAt = std::chrono::steady_clock::now();
return TryInvokeOnRenderThread("output-render", [this, &context, &outputFrame, queuedAt]() {
const auto startedAt = std::chrono::steady_clock::now();
const double queueWaitMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(startedAt - queuedAt).count();
mHealthTelemetry.TryRecordOutputRenderQueueWait(queueWaitMilliseconds);
mRenderCommandQueue.RequestOutputFrame({ context.videoState, context.completion });
RenderOutputFrameRequest request;
return mRenderCommandQueue.TryTakeOutputFrame(request) &&

View File

@@ -209,6 +209,7 @@ private:
OpenGLRenderPass mRenderPass;
OpenGLRenderPipeline mRenderPipeline;
OpenGLShaderPrograms mShaderPrograms;
HealthTelemetry& mHealthTelemetry;
HDC mHdc;
HGLRC mHglrc;

View File

@@ -164,6 +164,9 @@ error:
void OpenGLComposite::paintGL(bool force)
{
if (mRuntimeUpdateController)
mRuntimeUpdateController->ProcessRuntimeWork();
if (!force)
{
if (IsIconic(hGLWnd))
@@ -171,6 +174,12 @@ void OpenGLComposite::paintGL(bool force)
}
const unsigned previewFps = mRuntimeStore ? mRuntimeStore->GetConfiguredPreviewFps() : 30u;
if (!force && mVideoBackend && mVideoBackend->ShouldPrioritizeOutputOverPreview())
{
ValidateRect(hGLWnd, NULL);
return;
}
if (!mRenderEngine->TryPresentPreview(force, previewFps, mVideoBackend->OutputFrameWidth(), mVideoBackend->OutputFrameHeight()))
{
ValidateRect(hGLWnd, NULL);
@@ -261,6 +270,9 @@ bool OpenGLComposite::Start()
if (!mRenderEngine->StartRenderThread())
return false;
if (mRuntimeUpdateController)
mRuntimeUpdateController->ProcessRuntimeWork();
if (mVideoBackend->Start())
return true;
@@ -351,9 +363,6 @@ bool OpenGLComposite::RequestScreenshot(std::string& error)
void OpenGLComposite::renderEffect()
{
if (mRuntimeUpdateController)
mRuntimeUpdateController->ProcessRuntimeWork();
const RenderFrameInput frameInput = BuildRenderFrameInput();
RenderFrame(frameInput);
}

View File

@@ -8,7 +8,9 @@
#include <cstring>
#include <chrono>
#include <cstdlib>
#include <gl/gl.h>
#include <string>
OpenGLRenderPipeline::OpenGLRenderPipeline(
OpenGLRenderer& renderer,
@@ -22,7 +24,8 @@ OpenGLRenderPipeline::OpenGLRenderPipeline(
mHealthTelemetry(healthTelemetry),
mRenderEffect(renderEffect),
mOutputReady(outputReady),
mPaint(paint)
mPaint(paint),
mOutputReadbackMode(ReadOutputReadbackModeFromEnvironment())
{
}
@@ -53,9 +56,22 @@ bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context
mHealthTelemetry.TryRecordPerformanceStats(state.frameBudgetMilliseconds, renderMilliseconds);
mRuntimeSnapshotProvider.AdvanceFrame();
ReadOutputFrame(state, outputFrame);
if (mPaint)
mPaint();
OutputReadbackTiming readbackTiming = ReadOutputFrame(state, outputFrame);
mHealthTelemetry.TryRecordOutputRenderPipelineTiming(
renderMilliseconds,
readbackTiming.fenceWaitMilliseconds,
readbackTiming.mapMilliseconds,
readbackTiming.copyMilliseconds,
readbackTiming.cachedCopyMilliseconds,
readbackTiming.asyncQueueMilliseconds,
readbackTiming.asyncQueueBufferMilliseconds,
readbackTiming.asyncQueueSetupMilliseconds,
readbackTiming.asyncQueueReadPixelsMilliseconds,
readbackTiming.asyncQueueFenceMilliseconds,
readbackTiming.syncReadMilliseconds,
readbackTiming.asyncReadbackMissed,
readbackTiming.cachedFallbackUsed,
readbackTiming.syncFallbackUsed);
return true;
}
@@ -151,8 +167,9 @@ void OpenGLRenderPipeline::FlushAsyncReadbackPipeline()
mAsyncReadbackReadIndex = 0;
}
void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
bool OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing)
{
const auto queueStartTime = std::chrono::steady_clock::now();
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height;
const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA;
@@ -161,8 +178,16 @@ void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width);
const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height);
const auto finishTiming = [&timing, queueStartTime]() {
const auto queueEndTime = std::chrono::steady_clock::now();
timing.asyncQueueMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(queueEndTime - queueStartTime).count();
};
if (requiredBytes == 0)
return;
{
finishTiming();
return false;
}
if (mAsyncReadbackBytes != requiredBytes
|| mAsyncReadbackFormat != format
@@ -173,30 +198,50 @@ void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
mAsyncReadbackType = type;
mAsyncReadbackFramebuffer = framebuffer;
if (!EnsureAsyncReadbackBuffers(requiredBytes))
return;
{
finishTiming();
return false;
}
}
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex];
if (slot.fence != nullptr)
if (slot.inFlight)
{
glDeleteSync(slot.fence);
slot.fence = nullptr;
finishTiming();
return false;
}
auto stageStartTime = std::chrono::steady_clock::now();
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
auto stageEndTime = std::chrono::steady_clock::now();
timing.asyncQueueSetupMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
stageStartTime = std::chrono::steady_clock::now();
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
stageEndTime = std::chrono::steady_clock::now();
timing.asyncQueueBufferMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
stageStartTime = std::chrono::steady_clock::now();
glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr);
stageEndTime = std::chrono::steady_clock::now();
timing.asyncQueueReadPixelsMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
stageStartTime = std::chrono::steady_clock::now();
slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
stageEndTime = std::chrono::steady_clock::now();
timing.asyncQueueFenceMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(stageEndTime - stageStartTime).count();
slot.inFlight = slot.fence != nullptr;
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size();
finishTiming();
return slot.inFlight;
}
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds)
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds, OutputReadbackTiming& timing)
{
if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr)
return false;
@@ -206,15 +251,24 @@ bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFra
return false;
const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0;
const auto waitStartTime = std::chrono::steady_clock::now();
const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds);
const auto waitEndTime = std::chrono::steady_clock::now();
timing.fenceWaitMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(waitEndTime - waitStartTime).count();
if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
{
timing.asyncReadbackMissed = true;
return false;
}
glDeleteSync(slot.fence);
slot.fence = nullptr;
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
const auto mapStartTime = std::chrono::steady_clock::now();
void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
const auto mapEndTime = std::chrono::steady_clock::now();
timing.mapMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(mapEndTime - mapStartTime).count();
if (mappedBytes == nullptr)
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -223,7 +277,10 @@ bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFra
return false;
}
const auto copyStartTime = std::chrono::steady_clock::now();
std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes);
const auto copyEndTime = std::chrono::steady_clock::now();
timing.copyMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(copyEndTime - copyStartTime).count();
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -243,8 +300,26 @@ void OpenGLRenderPipeline::CacheOutputFrame(const VideoIOOutputFrame& outputFram
std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount);
}
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes)
bool OpenGLRenderPipeline::TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const
{
if (outputFrame.bytes == nullptr || outputFrame.height == 0 || outputFrame.rowBytes <= 0)
return false;
const std::size_t byteCount = static_cast<std::size_t>(outputFrame.rowBytes) * outputFrame.height;
if (mCachedOutputFrame.size() != byteCount)
return false;
const auto copyStartTime = std::chrono::steady_clock::now();
std::memcpy(outputFrame.bytes, mCachedOutputFrame.data(), byteCount);
const auto copyEndTime = std::chrono::steady_clock::now();
timing.cachedCopyMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(copyEndTime - copyStartTime).count();
timing.cachedFallbackUsed = true;
return true;
}
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing)
{
const auto readStartTime = std::chrono::steady_clock::now();
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
glPixelStorei(GL_PACK_ALIGNMENT, 4);
@@ -259,24 +334,78 @@ void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& stat
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes);
}
const auto readEndTime = std::chrono::steady_clock::now();
timing.syncReadMilliseconds += std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(readEndTime - readStartTime).count();
timing.syncFallbackUsed = true;
}
void OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
OpenGLRenderPipeline::OutputReadbackTiming OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
{
if (TryConsumeAsyncReadback(outputFrame, 500000))
OutputReadbackTiming timing;
if (mOutputReadbackMode == OutputReadbackMode::Synchronous)
{
QueueAsyncReadback(state);
return;
if (outputFrame.bytes != nullptr)
{
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
CacheOutputFrame(outputFrame);
}
return timing;
}
// If async readback misses the playout deadline, prefer a fresh synchronous
// frame over reusing stale cached output, then restart the async pipeline.
if (outputFrame.bytes != nullptr)
if (mOutputReadbackMode == OutputReadbackMode::CachedOnly)
{
ReadOutputFrameSynchronously(state, outputFrame.bytes);
if (TryCopyCachedOutputFrame(outputFrame, timing))
return timing;
if (outputFrame.bytes != nullptr)
{
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
CacheOutputFrame(outputFrame);
}
return timing;
}
if (TryConsumeAsyncReadback(outputFrame, 500000, timing))
{
(void)QueueAsyncReadback(state, timing);
return timing;
}
if (TryCopyCachedOutputFrame(outputFrame, timing))
{
(void)QueueAsyncReadback(state, timing);
return timing;
}
// Bootstrap only: until the first async readback has produced cached output,
// use one synchronous readback so DeckLink has a valid frame to schedule.
if (outputFrame.bytes != nullptr && mCachedOutputFrame.empty())
{
ReadOutputFrameSynchronously(state, outputFrame.bytes, timing);
CacheOutputFrame(outputFrame);
}
FlushAsyncReadbackPipeline();
QueueAsyncReadback(state);
(void)QueueAsyncReadback(state, timing);
return timing;
}
OpenGLRenderPipeline::OutputReadbackMode OpenGLRenderPipeline::ReadOutputReadbackModeFromEnvironment()
{
char* mode = nullptr;
std::size_t modeSize = 0;
if (_dupenv_s(&mode, &modeSize, "VST_OUTPUT_READBACK_MODE") != 0 || mode == nullptr)
return OutputReadbackMode::AsyncPbo;
const std::string modeValue(mode);
std::free(mode);
if (modeValue == "async_pbo")
return OutputReadbackMode::AsyncPbo;
if (modeValue == "sync")
return OutputReadbackMode::Synchronous;
if (modeValue == "cached_only")
return OutputReadbackMode::CachedOnly;
return OutputReadbackMode::AsyncPbo;
}

View File

@@ -36,6 +36,13 @@ public:
bool RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame);
private:
enum class OutputReadbackMode
{
AsyncPbo,
Synchronous,
CachedOnly
};
struct AsyncReadbackSlot
{
GLuint pixelPackBuffer = 0;
@@ -44,15 +51,34 @@ private:
bool inFlight = false;
};
struct OutputReadbackTiming
{
double fenceWaitMilliseconds = 0.0;
double mapMilliseconds = 0.0;
double copyMilliseconds = 0.0;
double cachedCopyMilliseconds = 0.0;
double asyncQueueMilliseconds = 0.0;
double asyncQueueBufferMilliseconds = 0.0;
double asyncQueueSetupMilliseconds = 0.0;
double asyncQueueReadPixelsMilliseconds = 0.0;
double asyncQueueFenceMilliseconds = 0.0;
double syncReadMilliseconds = 0.0;
bool asyncReadbackMissed = false;
bool cachedFallbackUsed = false;
bool syncFallbackUsed = false;
};
bool EnsureAsyncReadbackBuffers(std::size_t requiredBytes);
void ResetAsyncReadbackState();
void FlushAsyncReadbackPipeline();
void QueueAsyncReadback(const VideoIOState& state);
bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds);
bool QueueAsyncReadback(const VideoIOState& state, OutputReadbackTiming& timing);
bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds, OutputReadbackTiming& timing);
void CacheOutputFrame(const VideoIOOutputFrame& outputFrame);
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes);
bool TryCopyCachedOutputFrame(VideoIOOutputFrame& outputFrame, OutputReadbackTiming& timing) const;
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes, OutputReadbackTiming& timing);
void PackOutputFor10Bit(const VideoIOState& state);
void ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
OutputReadbackTiming ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
static OutputReadbackMode ReadOutputReadbackModeFromEnvironment();
OpenGLRenderer& mRenderer;
RuntimeSnapshotProvider& mRuntimeSnapshotProvider;
@@ -60,6 +86,7 @@ private:
RenderEffectCallback mRenderEffect;
OutputReadyCallback mOutputReady;
PaintCallback mPaint;
OutputReadbackMode mOutputReadbackMode = OutputReadbackMode::AsyncPbo;
std::array<AsyncReadbackSlot, 3> mAsyncReadbackSlots;
std::size_t mAsyncReadbackWriteIndex = 0;
std::size_t mAsyncReadbackReadIndex = 0;

View File

@@ -95,6 +95,24 @@ JsonValue RuntimeStatePresenter::BuildRuntimeStateValue(const RuntimeStore& runt
outputRender.set("renderMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderMilliseconds));
outputRender.set("smoothedRenderMs", JsonValue(telemetrySnapshot.backendPlayout.smoothedOutputRenderMilliseconds));
outputRender.set("maxRenderMs", JsonValue(telemetrySnapshot.backendPlayout.maxOutputRenderMilliseconds));
outputRender.set("acquireFrameMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameAcquireMilliseconds));
outputRender.set("renderRequestMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameRenderRequestMilliseconds));
outputRender.set("endAccessMs", JsonValue(telemetrySnapshot.backendPlayout.outputFrameEndAccessMilliseconds));
outputRender.set("queueWaitMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderQueueWaitMilliseconds));
outputRender.set("drawMs", JsonValue(telemetrySnapshot.backendPlayout.outputRenderDrawMilliseconds));
outputRender.set("fenceWaitMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackFenceWaitMilliseconds));
outputRender.set("mapMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackMapMilliseconds));
outputRender.set("readbackCopyMs", JsonValue(telemetrySnapshot.backendPlayout.outputReadbackCopyMilliseconds));
outputRender.set("cachedCopyMs", JsonValue(telemetrySnapshot.backendPlayout.outputCachedCopyMilliseconds));
outputRender.set("asyncQueueMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueMilliseconds));
outputRender.set("asyncQueueBufferMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueBufferMilliseconds));
outputRender.set("asyncQueueSetupMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueSetupMilliseconds));
outputRender.set("asyncQueueReadPixelsMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueReadPixelsMilliseconds));
outputRender.set("asyncQueueFenceMs", JsonValue(telemetrySnapshot.backendPlayout.outputAsyncQueueFenceMilliseconds));
outputRender.set("syncReadMs", JsonValue(telemetrySnapshot.backendPlayout.outputSyncReadMilliseconds));
outputRender.set("asyncReadbackMissCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputAsyncReadbackMissCount)));
outputRender.set("cachedFallbackCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputCachedFallbackCount)));
outputRender.set("syncFallbackCount", JsonValue(static_cast<double>(telemetrySnapshot.backendPlayout.outputSyncFallbackCount)));
JsonValue recovery = JsonValue::MakeObject();
recovery.set("completionResult", JsonValue(telemetrySnapshot.backendPlayout.completionResult));

View File

@@ -212,6 +212,7 @@ void HealthTelemetry::RecordBackendPlayoutHealth(const std::string& lifecycleSta
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
@@ -232,6 +233,9 @@ void HealthTelemetry::RecordBackendPlayoutHealth(const std::string& lifecycleSta
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
mBackendPlayout.outputFrameAcquireMilliseconds = std::max(outputFrameAcquireMilliseconds, 0.0);
mBackendPlayout.outputFrameRenderRequestMilliseconds = std::max(outputFrameRenderRequestMilliseconds, 0.0);
mBackendPlayout.outputFrameEndAccessMilliseconds = std::max(outputFrameEndAccessMilliseconds, 0.0);
mBackendPlayout.completedFrameIndex = completedFrameIndex;
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
@@ -251,6 +255,7 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
@@ -274,6 +279,9 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
mBackendPlayout.outputRenderMilliseconds = std::max(outputRenderMilliseconds, 0.0);
mBackendPlayout.smoothedOutputRenderMilliseconds = std::max(smoothedOutputRenderMilliseconds, 0.0);
mBackendPlayout.maxOutputRenderMilliseconds = std::max(maxOutputRenderMilliseconds, 0.0);
mBackendPlayout.outputFrameAcquireMilliseconds = std::max(outputFrameAcquireMilliseconds, 0.0);
mBackendPlayout.outputFrameRenderRequestMilliseconds = std::max(outputFrameRenderRequestMilliseconds, 0.0);
mBackendPlayout.outputFrameEndAccessMilliseconds = std::max(outputFrameEndAccessMilliseconds, 0.0);
mBackendPlayout.completedFrameIndex = completedFrameIndex;
mBackendPlayout.scheduledFrameIndex = scheduledFrameIndex;
mBackendPlayout.scheduledLeadFrames = scheduledLeadFrames;
@@ -289,6 +297,98 @@ bool HealthTelemetry::TryRecordBackendPlayoutHealth(const std::string& lifecycle
return true;
}
void HealthTelemetry::RecordOutputRenderQueueWait(double queueWaitMilliseconds)
{
std::lock_guard<std::mutex> lock(mMutex);
mBackendPlayout.outputRenderQueueWaitMilliseconds = std::max(queueWaitMilliseconds, 0.0);
}
bool HealthTelemetry::TryRecordOutputRenderQueueWait(double queueWaitMilliseconds)
{
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
if (!lock.owns_lock())
return false;
mBackendPlayout.outputRenderQueueWaitMilliseconds = std::max(queueWaitMilliseconds, 0.0);
return true;
}
void HealthTelemetry::RecordOutputRenderPipelineTiming(
double drawMilliseconds,
double fenceWaitMilliseconds,
double mapMilliseconds,
double readbackCopyMilliseconds,
double cachedCopyMilliseconds,
double asyncQueueMilliseconds,
double asyncQueueBufferMilliseconds,
double asyncQueueSetupMilliseconds,
double asyncQueueReadPixelsMilliseconds,
double asyncQueueFenceMilliseconds,
double syncReadMilliseconds,
bool asyncReadbackMissed,
bool cachedFallbackUsed,
bool syncFallbackUsed)
{
std::lock_guard<std::mutex> lock(mMutex);
mBackendPlayout.outputRenderDrawMilliseconds = std::max(drawMilliseconds, 0.0);
mBackendPlayout.outputReadbackFenceWaitMilliseconds = std::max(fenceWaitMilliseconds, 0.0);
mBackendPlayout.outputReadbackMapMilliseconds = std::max(mapMilliseconds, 0.0);
mBackendPlayout.outputReadbackCopyMilliseconds = std::max(readbackCopyMilliseconds, 0.0);
mBackendPlayout.outputCachedCopyMilliseconds = std::max(cachedCopyMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueMilliseconds = std::max(asyncQueueMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueBufferMilliseconds = std::max(asyncQueueBufferMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueSetupMilliseconds = std::max(asyncQueueSetupMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueReadPixelsMilliseconds = std::max(asyncQueueReadPixelsMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueFenceMilliseconds = std::max(asyncQueueFenceMilliseconds, 0.0);
mBackendPlayout.outputSyncReadMilliseconds = std::max(syncReadMilliseconds, 0.0);
if (asyncReadbackMissed)
++mBackendPlayout.outputAsyncReadbackMissCount;
if (cachedFallbackUsed)
++mBackendPlayout.outputCachedFallbackCount;
if (syncFallbackUsed)
++mBackendPlayout.outputSyncFallbackCount;
}
bool HealthTelemetry::TryRecordOutputRenderPipelineTiming(
double drawMilliseconds,
double fenceWaitMilliseconds,
double mapMilliseconds,
double readbackCopyMilliseconds,
double cachedCopyMilliseconds,
double asyncQueueMilliseconds,
double asyncQueueBufferMilliseconds,
double asyncQueueSetupMilliseconds,
double asyncQueueReadPixelsMilliseconds,
double asyncQueueFenceMilliseconds,
double syncReadMilliseconds,
bool asyncReadbackMissed,
bool cachedFallbackUsed,
bool syncFallbackUsed)
{
std::unique_lock<std::mutex> lock(mMutex, std::try_to_lock);
if (!lock.owns_lock())
return false;
mBackendPlayout.outputRenderDrawMilliseconds = std::max(drawMilliseconds, 0.0);
mBackendPlayout.outputReadbackFenceWaitMilliseconds = std::max(fenceWaitMilliseconds, 0.0);
mBackendPlayout.outputReadbackMapMilliseconds = std::max(mapMilliseconds, 0.0);
mBackendPlayout.outputReadbackCopyMilliseconds = std::max(readbackCopyMilliseconds, 0.0);
mBackendPlayout.outputCachedCopyMilliseconds = std::max(cachedCopyMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueMilliseconds = std::max(asyncQueueMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueBufferMilliseconds = std::max(asyncQueueBufferMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueSetupMilliseconds = std::max(asyncQueueSetupMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueReadPixelsMilliseconds = std::max(asyncQueueReadPixelsMilliseconds, 0.0);
mBackendPlayout.outputAsyncQueueFenceMilliseconds = std::max(asyncQueueFenceMilliseconds, 0.0);
mBackendPlayout.outputSyncReadMilliseconds = std::max(syncReadMilliseconds, 0.0);
if (asyncReadbackMissed)
++mBackendPlayout.outputAsyncReadbackMissCount;
if (cachedFallbackUsed)
++mBackendPlayout.outputCachedFallbackCount;
if (syncFallbackUsed)
++mBackendPlayout.outputSyncFallbackCount;
return true;
}
HealthTelemetry::SignalStatusSnapshot HealthTelemetry::GetSignalStatusSnapshot() const
{
std::lock_guard<std::mutex> lock(mMutex);

View File

@@ -97,6 +97,24 @@ public:
double outputRenderMilliseconds = 0.0;
double smoothedOutputRenderMilliseconds = 0.0;
double maxOutputRenderMilliseconds = 0.0;
double outputFrameAcquireMilliseconds = 0.0;
double outputFrameRenderRequestMilliseconds = 0.0;
double outputFrameEndAccessMilliseconds = 0.0;
double outputRenderQueueWaitMilliseconds = 0.0;
double outputRenderDrawMilliseconds = 0.0;
double outputReadbackFenceWaitMilliseconds = 0.0;
double outputReadbackMapMilliseconds = 0.0;
double outputReadbackCopyMilliseconds = 0.0;
double outputCachedCopyMilliseconds = 0.0;
double outputAsyncQueueMilliseconds = 0.0;
double outputAsyncQueueBufferMilliseconds = 0.0;
double outputAsyncQueueSetupMilliseconds = 0.0;
double outputAsyncQueueReadPixelsMilliseconds = 0.0;
double outputAsyncQueueFenceMilliseconds = 0.0;
double outputSyncReadMilliseconds = 0.0;
uint64_t outputAsyncReadbackMissCount = 0;
uint64_t outputCachedFallbackCount = 0;
uint64_t outputSyncFallbackCount = 0;
uint64_t completedFrameIndex = 0;
uint64_t scheduledFrameIndex = 0;
uint64_t scheduledLeadFrames = 0;
@@ -161,6 +179,7 @@ public:
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
@@ -170,11 +189,46 @@ public:
std::size_t minReadyQueueDepth, std::size_t maxReadyQueueDepth, uint64_t readyQueueZeroDepthCount,
uint64_t readyQueuePoppedCount, uint64_t readyQueueDroppedCount, uint64_t readyQueueUnderrunCount,
double outputRenderMilliseconds, double smoothedOutputRenderMilliseconds, double maxOutputRenderMilliseconds,
double outputFrameAcquireMilliseconds, double outputFrameRenderRequestMilliseconds, double outputFrameEndAccessMilliseconds,
uint64_t completedFrameIndex, uint64_t scheduledFrameIndex, uint64_t scheduledLeadFrames,
uint64_t measuredLagFrames, uint64_t catchUpFrames, uint64_t lateStreak, uint64_t dropStreak,
uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount,
bool degraded, const std::string& statusMessage);
void RecordOutputRenderQueueWait(double queueWaitMilliseconds);
bool TryRecordOutputRenderQueueWait(double queueWaitMilliseconds);
void RecordOutputRenderPipelineTiming(
double drawMilliseconds,
double fenceWaitMilliseconds,
double mapMilliseconds,
double readbackCopyMilliseconds,
double cachedCopyMilliseconds,
double asyncQueueMilliseconds,
double asyncQueueBufferMilliseconds,
double asyncQueueSetupMilliseconds,
double asyncQueueReadPixelsMilliseconds,
double asyncQueueFenceMilliseconds,
double syncReadMilliseconds,
bool asyncReadbackMissed,
bool cachedFallbackUsed,
bool syncFallbackUsed);
bool TryRecordOutputRenderPipelineTiming(
double drawMilliseconds,
double fenceWaitMilliseconds,
double mapMilliseconds,
double readbackCopyMilliseconds,
double cachedCopyMilliseconds,
double asyncQueueMilliseconds,
double asyncQueueBufferMilliseconds,
double asyncQueueSetupMilliseconds,
double asyncQueueReadPixelsMilliseconds,
double asyncQueueFenceMilliseconds,
double syncReadMilliseconds,
bool asyncReadbackMissed,
bool cachedFallbackUsed,
bool syncFallbackUsed);
SignalStatusSnapshot GetSignalStatusSnapshot() const;
VideoIOStatusSnapshot GetVideoIOStatusSnapshot() const;
PerformanceSnapshot GetPerformanceSnapshot() const;

View File

@@ -9,12 +9,14 @@
#include <algorithm>
#include <chrono>
#include <cstring>
#include <cmath>
#include <windows.h>
VideoBackend::VideoBackend(RenderEngine& renderEngine, HealthTelemetry& healthTelemetry, RuntimeEventDispatcher& runtimeEventDispatcher) :
mHealthTelemetry(healthTelemetry),
mRuntimeEventDispatcher(runtimeEventDispatcher),
mPlayoutPolicy(NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy())),
mOutputProductionController(mPlayoutPolicy),
mReadyOutputQueue(mPlayoutPolicy),
mVideoIODevice(std::make_unique<DeckLinkSession>()),
mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine))
@@ -80,6 +82,7 @@ bool VideoBackend::ConfigureInput(const VideoFormat& inputVideoMode, std::string
bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error)
{
mPlayoutPolicy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy);
mOutputProductionController.Configure(mPlayoutPolicy);
mReadyOutputQueue.Configure(mPlayoutPolicy);
if (mLifecycle.State() != VideoBackendLifecycleState::Configuring)
ApplyLifecycleTransition(VideoBackendLifecycleState::Configuring, "Configuring video backend output.");
@@ -101,7 +104,10 @@ bool VideoBackend::Start()
StartOutputCompletionWorker();
const bool started = mVideoIODevice->Start();
if (started)
{
StartOutputProducerWorker();
ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started.");
}
else
{
StopOutputCompletionWorker();
@@ -113,6 +119,7 @@ bool VideoBackend::Start()
bool VideoBackend::Stop()
{
ApplyLifecycleTransition(VideoBackendLifecycleState::Stopping, "Video backend stopping.");
StopOutputProducerWorker();
const bool stopped = mVideoIODevice->Stop();
StopOutputCompletionWorker();
if (stopped)
@@ -232,6 +239,12 @@ const std::string& VideoBackend::StatusMessage() const
return mVideoIODevice->StatusMessage();
}
bool VideoBackend::ShouldPrioritizeOutputOverPreview() const
{
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
return metrics.depth < static_cast<std::size_t>(mPlayoutPolicy.targetReadyFrames);
}
void VideoBackend::SetStatusMessage(const std::string& message)
{
mVideoIODevice->SetStatusMessage(message);
@@ -288,27 +301,36 @@ void VideoBackend::HandleOutputFrameCompletion(const VideoIOCompletion& completi
void VideoBackend::StartOutputCompletionWorker()
{
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
if (mOutputCompletionWorkerRunning)
return;
{
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
if (mOutputCompletionWorkerRunning)
return;
mPendingOutputCompletions.clear();
mReadyOutputQueue.Clear();
mNextReadyOutputFrameIndex = 0;
mHasReadyQueueDepthBaseline = false;
mMinReadyQueueDepth = 0;
mMaxReadyQueueDepth = 0;
mReadyQueueZeroDepthCount = 0;
mOutputRenderMilliseconds = 0.0;
mSmoothedOutputRenderMilliseconds = 0.0;
mMaxOutputRenderMilliseconds = 0.0;
mOutputCompletionWorkerStopping = false;
mOutputCompletionWorkerRunning = true;
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
mPendingOutputCompletions.clear();
mReadyOutputQueue.Clear();
mNextReadyOutputFrameIndex = 0;
mHasReadyQueueDepthBaseline = false;
mMinReadyQueueDepth = 0;
mMaxReadyQueueDepth = 0;
mReadyQueueZeroDepthCount = 0;
mOutputRenderMilliseconds = 0.0;
mSmoothedOutputRenderMilliseconds = 0.0;
mMaxOutputRenderMilliseconds = 0.0;
mOutputFrameAcquireMilliseconds = 0.0;
mOutputFrameRenderRequestMilliseconds = 0.0;
mOutputFrameEndAccessMilliseconds = 0.0;
mLastLateStreak = 0;
mLastDropStreak = 0;
mOutputCompletionWorkerStopping = false;
mOutputCompletionWorkerRunning = true;
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
}
}
void VideoBackend::StopOutputCompletionWorker()
{
StopOutputProducerWorker();
bool shouldJoin = false;
{
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
@@ -322,6 +344,40 @@ void VideoBackend::StopOutputCompletionWorker()
mOutputCompletionWorker.join();
}
void VideoBackend::StartOutputProducerWorker()
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerRunning)
return;
mLastOutputProductionCompletion = VideoIOCompletion();
mLastOutputProductionTime = std::chrono::steady_clock::time_point();
mOutputProducerWorkerStopping = false;
mOutputProducerWorkerRunning = true;
mOutputProducerWorker = std::thread(&VideoBackend::OutputProducerWorkerMain, this);
mOutputProducerCondition.notify_one();
}
void VideoBackend::StopOutputProducerWorker()
{
bool shouldJoin = false;
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerRunning)
mOutputProducerWorkerStopping = true;
shouldJoin = mOutputProducerWorker.joinable();
}
mOutputProducerCondition.notify_one();
if (shouldJoin)
mOutputProducerWorker.join();
}
void VideoBackend::NotifyOutputProducer()
{
mOutputProducerCondition.notify_one();
}
void VideoBackend::OutputCompletionWorkerMain()
{
for (;;)
@@ -351,6 +407,57 @@ void VideoBackend::OutputCompletionWorkerMain()
}
}
void VideoBackend::OutputProducerWorkerMain()
{
for (;;)
{
{
std::unique_lock<std::mutex> lock(mOutputProducerMutex);
mOutputProducerCondition.wait_for(lock, OutputProducerWakeInterval());
if (mOutputProducerWorkerStopping)
{
mOutputProducerWorkerRunning = false;
return;
}
}
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(metrics);
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
if (decision.action != OutputProductionAction::Produce || decision.requestedFrames == 0)
continue;
VideoIOCompletion completion;
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerStopping)
continue;
completion = mLastOutputProductionCompletion;
}
const auto now = std::chrono::steady_clock::now();
if (mLastOutputProductionTime != std::chrono::steady_clock::time_point() &&
now - mLastOutputProductionTime < OutputProducerWakeInterval())
{
continue;
}
if (ProduceReadyOutputFrames(completion, 1) > 0)
mLastOutputProductionTime = std::chrono::steady_clock::now();
}
}
std::chrono::milliseconds VideoBackend::OutputProducerWakeInterval() const
{
const double frameBudgetMilliseconds = State().frameBudgetMilliseconds;
if (frameBudgetMilliseconds <= 0.0)
return std::chrono::milliseconds(8);
const int intervalMilliseconds = (std::max)(1, static_cast<int>(std::floor(frameBudgetMilliseconds * 0.75)));
return std::chrono::milliseconds(intervalMilliseconds);
}
void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion)
{
RecordFramePacing(completion.result);
@@ -358,31 +465,69 @@ void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& complet
const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(initialQueueMetrics);
const VideoPlayoutRecoveryDecision recoveryDecision = AccountForCompletionResult(completion.result, initialQueueMetrics.depth);
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
mLastLateStreak = recoveryDecision.lateStreak;
mLastDropStreak = recoveryDecision.dropStreak;
}
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
mLastOutputProductionCompletion = completion;
}
NotifyOutputProducer();
FillReadyOutputQueue(completion);
if (!ScheduleReadyOutputFrame())
if (!ScheduleReadyOutputFrame() &&
(ProduceReadyOutputFrames(completion, 1) == 0 || !ScheduleReadyOutputFrame()))
{
ScheduleBlackUnderrunFrame();
}
NotifyOutputProducer();
RecordBackendPlayoutHealth(completion.result, recoveryDecision);
}
void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision)
{
const RenderOutputQueueMetrics queueMetrics = mReadyOutputQueue.GetMetrics();
std::size_t minReadyQueueDepth = 0;
std::size_t maxReadyQueueDepth = 0;
uint64_t readyQueueZeroDepthCount = 0;
double outputRenderMilliseconds = 0.0;
double smoothedOutputRenderMilliseconds = 0.0;
double maxOutputRenderMilliseconds = 0.0;
double outputFrameAcquireMilliseconds = 0.0;
double outputFrameRenderRequestMilliseconds = 0.0;
double outputFrameEndAccessMilliseconds = 0.0;
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
minReadyQueueDepth = mMinReadyQueueDepth;
maxReadyQueueDepth = mMaxReadyQueueDepth;
readyQueueZeroDepthCount = mReadyQueueZeroDepthCount;
outputRenderMilliseconds = mOutputRenderMilliseconds;
smoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds;
maxOutputRenderMilliseconds = mMaxOutputRenderMilliseconds;
outputFrameAcquireMilliseconds = mOutputFrameAcquireMilliseconds;
outputFrameRenderRequestMilliseconds = mOutputFrameRenderRequestMilliseconds;
outputFrameEndAccessMilliseconds = mOutputFrameEndAccessMilliseconds;
}
mHealthTelemetry.TryRecordBackendPlayoutHealth(
VideoBackendLifecycle::StateName(mLifecycle.State()),
CompletionResultName(result),
queueMetrics.depth,
queueMetrics.capacity,
queueMetrics.pushedCount,
mMinReadyQueueDepth,
mMaxReadyQueueDepth,
mReadyQueueZeroDepthCount,
minReadyQueueDepth,
maxReadyQueueDepth,
readyQueueZeroDepthCount,
queueMetrics.poppedCount,
queueMetrics.droppedCount,
queueMetrics.underrunCount,
mOutputRenderMilliseconds,
mSmoothedOutputRenderMilliseconds,
mMaxOutputRenderMilliseconds,
outputRenderMilliseconds,
smoothedOutputRenderMilliseconds,
maxOutputRenderMilliseconds,
outputFrameAcquireMilliseconds,
outputFrameRenderRequestMilliseconds,
outputFrameEndAccessMilliseconds,
recoveryDecision.completedFrameIndex,
recoveryDecision.scheduledFrameIndex,
recoveryDecision.scheduledLeadFrames,
@@ -397,50 +542,93 @@ void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, co
StatusMessage());
}
bool VideoBackend::FillReadyOutputQueue(const VideoIOCompletion& completion)
std::size_t VideoBackend::ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames)
{
if (maxFrames == 0)
return 0;
std::lock_guard<std::mutex> productionLock(mOutputProductionMutex);
RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
bool filledAny = false;
while (metrics.depth < mPlayoutPolicy.targetReadyFrames)
std::size_t producedFrames = 0;
while (producedFrames < maxFrames)
{
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
if (decision.action != OutputProductionAction::Produce)
break;
if (!RenderReadyOutputFrame(mVideoIODevice->State(), completion))
return filledAny;
filledAny = true;
break;
++producedFrames;
metrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(metrics);
}
return true;
return producedFrames;
}
OutputProductionPressure VideoBackend::BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const
{
OutputProductionPressure pressure;
pressure.readyQueueDepth = metrics.depth;
pressure.readyQueueCapacity = metrics.capacity;
pressure.readyQueueUnderrunCount = metrics.underrunCount;
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
pressure.lateStreak = mLastLateStreak;
pressure.dropStreak = mLastDropStreak;
}
return pressure;
}
bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion)
{
const auto renderStart = std::chrono::steady_clock::now();
VideoIOOutputFrame outputFrame;
const auto acquireStart = std::chrono::steady_clock::now();
if (!BeginOutputFrame(outputFrame))
return false;
const auto acquireEnd = std::chrono::steady_clock::now();
bool rendered = true;
const auto renderRequestStart = std::chrono::steady_clock::now();
if (mBridge)
rendered = mBridge->RenderScheduledFrame(state, completion, outputFrame);
const auto renderRequestEnd = std::chrono::steady_clock::now();
const auto endAccessStart = std::chrono::steady_clock::now();
EndOutputFrame(outputFrame);
const auto endAccessEnd = std::chrono::steady_clock::now();
const double acquireMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(acquireEnd - acquireStart).count();
const double renderRequestMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderRequestEnd - renderRequestStart).count();
const double endAccessMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(endAccessEnd - endAccessStart).count();
if (!rendered)
{
ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame.");
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
std::chrono::steady_clock::now() - renderStart).count();
RecordOutputRenderDuration(renderMilliseconds);
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
if (outputFrame.nativeFrame != nullptr)
{
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
outputFrame.nativeFrame = nullptr;
}
return false;
}
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
std::chrono::steady_clock::now() - renderStart).count();
RecordOutputRenderDuration(renderMilliseconds);
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
RenderOutputFrame readyFrame;
readyFrame.frame = outputFrame;
readyFrame.frameIndex = ++mNextReadyOutputFrameIndex;
return mReadyOutputQueue.Push(readyFrame);
const bool pushed = mReadyOutputQueue.Push(readyFrame);
if (!pushed && outputFrame.nativeFrame != nullptr)
{
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
outputFrame.nativeFrame = nullptr;
}
return pushed;
}
bool VideoBackend::ScheduleReadyOutputFrame()
@@ -516,6 +704,7 @@ void VideoBackend::RecordFramePacing(VideoIOCompletionResult completionResult)
void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics)
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
if (!mHasReadyQueueDepthBaseline)
{
mHasReadyQueueDepthBaseline = true;
@@ -532,14 +721,18 @@ void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& m
++mReadyQueueZeroDepthCount;
}
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds)
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds)
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
mOutputRenderMilliseconds = (std::max)(renderMilliseconds, 0.0);
if (mSmoothedOutputRenderMilliseconds <= 0.0)
mSmoothedOutputRenderMilliseconds = mOutputRenderMilliseconds;
else
mSmoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds * 0.9 + mOutputRenderMilliseconds * 0.1;
mMaxOutputRenderMilliseconds = (std::max)(mMaxOutputRenderMilliseconds, mOutputRenderMilliseconds);
mOutputFrameAcquireMilliseconds = (std::max)(acquireMilliseconds, 0.0);
mOutputFrameRenderRequestMilliseconds = (std::max)(renderRequestMilliseconds, 0.0);
mOutputFrameEndAccessMilliseconds = (std::max)(endAccessMilliseconds, 0.0);
PublishTimingSample("VideoBackend", "outputRender", mOutputRenderMilliseconds, "ms");
PublishTimingSample("VideoBackend", "smoothedOutputRender", mSmoothedOutputRenderMilliseconds, "ms");

View File

@@ -1,5 +1,6 @@
#pragma once
#include "OutputProductionController.h"
#include "RenderOutputQueue.h"
#include "VideoBackendLifecycle.h"
#include "VideoIOTypes.h"
@@ -59,6 +60,7 @@ public:
bool KeyerInterfaceAvailable() const;
bool ExternalKeyingActive() const;
const std::string& StatusMessage() const;
bool ShouldPrioritizeOutputOverPreview() const;
void SetStatusMessage(const std::string& message);
void PublishStatus(bool externalKeyingConfigured, const std::string& statusMessage = std::string());
void ReportNoInputDeviceSignalStatus();
@@ -69,14 +71,20 @@ private:
void StartOutputCompletionWorker();
void StopOutputCompletionWorker();
void OutputCompletionWorkerMain();
void StartOutputProducerWorker();
void StopOutputProducerWorker();
void OutputProducerWorkerMain();
void NotifyOutputProducer();
std::chrono::milliseconds OutputProducerWakeInterval() const;
void ProcessOutputFrameCompletion(const VideoIOCompletion& completion);
bool FillReadyOutputQueue(const VideoIOCompletion& completion);
std::size_t ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames);
OutputProductionPressure BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const;
bool RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion);
bool ScheduleReadyOutputFrame();
bool ScheduleBlackUnderrunFrame();
void RecordFramePacing(VideoIOCompletionResult completionResult);
void RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics);
void RecordOutputRenderDuration(double renderMilliseconds);
void RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds);
bool ApplyLifecycleTransition(VideoBackendLifecycleState state, const std::string& message);
bool ApplyLifecycleFailure(const std::string& message);
void PublishBackendStateChanged(const std::string& state, const std::string& message);
@@ -92,6 +100,7 @@ private:
RuntimeEventDispatcher& mRuntimeEventDispatcher;
VideoBackendLifecycle mLifecycle;
VideoPlayoutPolicy mPlayoutPolicy;
OutputProductionController mOutputProductionController;
RenderOutputQueue mReadyOutputQueue;
std::unique_ptr<VideoIODevice> mVideoIODevice;
std::unique_ptr<OpenGLVideoIOBridge> mBridge;
@@ -99,8 +108,17 @@ private:
std::condition_variable mOutputCompletionCondition;
std::deque<VideoIOCompletion> mPendingOutputCompletions;
std::thread mOutputCompletionWorker;
std::mutex mOutputProducerMutex;
std::condition_variable mOutputProducerCondition;
std::thread mOutputProducerWorker;
VideoIOCompletion mLastOutputProductionCompletion;
std::chrono::steady_clock::time_point mLastOutputProductionTime;
std::mutex mOutputProductionMutex;
mutable std::mutex mOutputMetricsMutex;
bool mOutputCompletionWorkerRunning = false;
bool mOutputCompletionWorkerStopping = false;
bool mOutputProducerWorkerRunning = false;
bool mOutputProducerWorkerStopping = false;
uint64_t mNextReadyOutputFrameIndex = 0;
uint64_t mInputFrameIndex = 0;
uint64_t mOutputFrameScheduleIndex = 0;
@@ -121,6 +139,11 @@ private:
double mOutputRenderMilliseconds = 0.0;
double mSmoothedOutputRenderMilliseconds = 0.0;
double mMaxOutputRenderMilliseconds = 0.0;
double mOutputFrameAcquireMilliseconds = 0.0;
double mOutputFrameRenderRequestMilliseconds = 0.0;
double mOutputFrameEndAccessMilliseconds = 0.0;
uint64_t mLastLateStreak = 0;
uint64_t mLastDropStreak = 0;
uint64_t mLateFrameCount = 0;
uint64_t mDroppedFrameCount = 0;
uint64_t mFlushedFrameCount = 0;

View File

@@ -30,5 +30,8 @@ inline VideoPlayoutPolicy NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy policy)
policy.targetReadyFrames = 1;
if (policy.maxReadyFrames < policy.targetReadyFrames)
policy.maxReadyFrames = policy.targetReadyFrames;
const unsigned minimumOutputFramePoolSize = policy.targetPrerollFrames + policy.maxReadyFrames + policy.minimumSpareDeviceFrames;
if (policy.outputFramePoolSize < minimumOutputFramePoolSize)
policy.outputFramePoolSize = minimumOutputFramePoolSize;
return policy;
}

View File

@@ -423,7 +423,6 @@ bool DeckLinkSession::AcquireNextOutputVideoFrame(CComPtr<IDeckLinkMutableVideoF
return false;
outputVideoFrame = outputVideoFrameQueue.front();
outputVideoFrameQueue.push_back(outputVideoFrame);
outputVideoFrameQueue.pop_front();
return outputVideoFrame != nullptr;
}
@@ -448,6 +447,7 @@ bool DeckLinkSession::PopulateOutputFrame(IDeckLinkMutableVideoFrame* outputVide
frame.width = mState.outputFrameSize.width;
frame.height = mState.outputFrameSize.height;
frame.pixelFormat = mState.outputPixelFormat;
outputVideoFrame->AddRef();
frame.nativeFrame = outputVideoFrame;
frame.nativeBuffer = outputVideoFrameBuffer.Detach();
return true;
@@ -506,7 +506,10 @@ VideoPlayoutRecoveryDecision DeckLinkSession::AccountForCompletionResult(VideoIO
bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame)
{
IDeckLinkMutableVideoFrame* outputVideoFrame = static_cast<IDeckLinkMutableVideoFrame*>(frame.nativeFrame);
return ScheduleFrame(outputVideoFrame);
const bool scheduled = ScheduleFrame(outputVideoFrame);
if (outputVideoFrame != nullptr)
outputVideoFrame->Release();
return scheduled;
}
bool DeckLinkSession::Start()
@@ -616,8 +619,18 @@ void DeckLinkSession::HandleVideoInputFrame(IDeckLinkVideoInputFrame* inputFrame
inputFrameBuffer->EndAccess(bmdBufferAccessRead);
}
void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame*, BMDOutputFrameCompletionResult completionResult)
void DeckLinkSession::HandlePlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult)
{
if (completedFrame != nullptr)
{
CComPtr<IDeckLinkMutableVideoFrame> reusableFrame;
if (completedFrame->QueryInterface(IID_IDeckLinkMutableVideoFrame, reinterpret_cast<void**>(&reusableFrame)) == S_OK &&
reusableFrame != nullptr)
{
outputVideoFrameQueue.push_back(reusableFrame);
}
}
if (!mOutputFrameCallback)
return;