Performance chasing
All checks were successful
CI / React UI Build (push) Successful in 10s
CI / Native Windows Build And Tests (push) Successful in 2m51s
CI / Windows Release Package (push) Successful in 2m55s

This commit is contained in:
Aiden
2026-05-11 23:10:45 +10:00
parent c5cead6003
commit a434a88108
18 changed files with 1115 additions and 82 deletions

View File

@@ -9,12 +9,14 @@
#include <algorithm>
#include <chrono>
#include <cstring>
#include <cmath>
#include <windows.h>
VideoBackend::VideoBackend(RenderEngine& renderEngine, HealthTelemetry& healthTelemetry, RuntimeEventDispatcher& runtimeEventDispatcher) :
mHealthTelemetry(healthTelemetry),
mRuntimeEventDispatcher(runtimeEventDispatcher),
mPlayoutPolicy(NormalizeVideoPlayoutPolicy(VideoPlayoutPolicy())),
mOutputProductionController(mPlayoutPolicy),
mReadyOutputQueue(mPlayoutPolicy),
mVideoIODevice(std::make_unique<DeckLinkSession>()),
mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine))
@@ -80,6 +82,7 @@ bool VideoBackend::ConfigureInput(const VideoFormat& inputVideoMode, std::string
bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error)
{
mPlayoutPolicy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy);
mOutputProductionController.Configure(mPlayoutPolicy);
mReadyOutputQueue.Configure(mPlayoutPolicy);
if (mLifecycle.State() != VideoBackendLifecycleState::Configuring)
ApplyLifecycleTransition(VideoBackendLifecycleState::Configuring, "Configuring video backend output.");
@@ -101,7 +104,10 @@ bool VideoBackend::Start()
StartOutputCompletionWorker();
const bool started = mVideoIODevice->Start();
if (started)
{
StartOutputProducerWorker();
ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started.");
}
else
{
StopOutputCompletionWorker();
@@ -113,6 +119,7 @@ bool VideoBackend::Start()
bool VideoBackend::Stop()
{
ApplyLifecycleTransition(VideoBackendLifecycleState::Stopping, "Video backend stopping.");
StopOutputProducerWorker();
const bool stopped = mVideoIODevice->Stop();
StopOutputCompletionWorker();
if (stopped)
@@ -232,6 +239,12 @@ const std::string& VideoBackend::StatusMessage() const
return mVideoIODevice->StatusMessage();
}
bool VideoBackend::ShouldPrioritizeOutputOverPreview() const
{
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
return metrics.depth < static_cast<std::size_t>(mPlayoutPolicy.targetReadyFrames);
}
void VideoBackend::SetStatusMessage(const std::string& message)
{
mVideoIODevice->SetStatusMessage(message);
@@ -288,27 +301,36 @@ void VideoBackend::HandleOutputFrameCompletion(const VideoIOCompletion& completi
void VideoBackend::StartOutputCompletionWorker()
{
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
if (mOutputCompletionWorkerRunning)
return;
{
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
if (mOutputCompletionWorkerRunning)
return;
mPendingOutputCompletions.clear();
mReadyOutputQueue.Clear();
mNextReadyOutputFrameIndex = 0;
mHasReadyQueueDepthBaseline = false;
mMinReadyQueueDepth = 0;
mMaxReadyQueueDepth = 0;
mReadyQueueZeroDepthCount = 0;
mOutputRenderMilliseconds = 0.0;
mSmoothedOutputRenderMilliseconds = 0.0;
mMaxOutputRenderMilliseconds = 0.0;
mOutputCompletionWorkerStopping = false;
mOutputCompletionWorkerRunning = true;
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
mPendingOutputCompletions.clear();
mReadyOutputQueue.Clear();
mNextReadyOutputFrameIndex = 0;
mHasReadyQueueDepthBaseline = false;
mMinReadyQueueDepth = 0;
mMaxReadyQueueDepth = 0;
mReadyQueueZeroDepthCount = 0;
mOutputRenderMilliseconds = 0.0;
mSmoothedOutputRenderMilliseconds = 0.0;
mMaxOutputRenderMilliseconds = 0.0;
mOutputFrameAcquireMilliseconds = 0.0;
mOutputFrameRenderRequestMilliseconds = 0.0;
mOutputFrameEndAccessMilliseconds = 0.0;
mLastLateStreak = 0;
mLastDropStreak = 0;
mOutputCompletionWorkerStopping = false;
mOutputCompletionWorkerRunning = true;
mOutputCompletionWorker = std::thread(&VideoBackend::OutputCompletionWorkerMain, this);
}
}
void VideoBackend::StopOutputCompletionWorker()
{
StopOutputProducerWorker();
bool shouldJoin = false;
{
std::lock_guard<std::mutex> lock(mOutputCompletionMutex);
@@ -322,6 +344,40 @@ void VideoBackend::StopOutputCompletionWorker()
mOutputCompletionWorker.join();
}
void VideoBackend::StartOutputProducerWorker()
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerRunning)
return;
mLastOutputProductionCompletion = VideoIOCompletion();
mLastOutputProductionTime = std::chrono::steady_clock::time_point();
mOutputProducerWorkerStopping = false;
mOutputProducerWorkerRunning = true;
mOutputProducerWorker = std::thread(&VideoBackend::OutputProducerWorkerMain, this);
mOutputProducerCondition.notify_one();
}
void VideoBackend::StopOutputProducerWorker()
{
bool shouldJoin = false;
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerRunning)
mOutputProducerWorkerStopping = true;
shouldJoin = mOutputProducerWorker.joinable();
}
mOutputProducerCondition.notify_one();
if (shouldJoin)
mOutputProducerWorker.join();
}
void VideoBackend::NotifyOutputProducer()
{
mOutputProducerCondition.notify_one();
}
void VideoBackend::OutputCompletionWorkerMain()
{
for (;;)
@@ -351,6 +407,57 @@ void VideoBackend::OutputCompletionWorkerMain()
}
}
void VideoBackend::OutputProducerWorkerMain()
{
for (;;)
{
{
std::unique_lock<std::mutex> lock(mOutputProducerMutex);
mOutputProducerCondition.wait_for(lock, OutputProducerWakeInterval());
if (mOutputProducerWorkerStopping)
{
mOutputProducerWorkerRunning = false;
return;
}
}
const RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(metrics);
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
if (decision.action != OutputProductionAction::Produce || decision.requestedFrames == 0)
continue;
VideoIOCompletion completion;
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
if (mOutputProducerWorkerStopping)
continue;
completion = mLastOutputProductionCompletion;
}
const auto now = std::chrono::steady_clock::now();
if (mLastOutputProductionTime != std::chrono::steady_clock::time_point() &&
now - mLastOutputProductionTime < OutputProducerWakeInterval())
{
continue;
}
if (ProduceReadyOutputFrames(completion, 1) > 0)
mLastOutputProductionTime = std::chrono::steady_clock::now();
}
}
std::chrono::milliseconds VideoBackend::OutputProducerWakeInterval() const
{
const double frameBudgetMilliseconds = State().frameBudgetMilliseconds;
if (frameBudgetMilliseconds <= 0.0)
return std::chrono::milliseconds(8);
const int intervalMilliseconds = (std::max)(1, static_cast<int>(std::floor(frameBudgetMilliseconds * 0.75)));
return std::chrono::milliseconds(intervalMilliseconds);
}
void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& completion)
{
RecordFramePacing(completion.result);
@@ -358,31 +465,69 @@ void VideoBackend::ProcessOutputFrameCompletion(const VideoIOCompletion& complet
const RenderOutputQueueMetrics initialQueueMetrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(initialQueueMetrics);
const VideoPlayoutRecoveryDecision recoveryDecision = AccountForCompletionResult(completion.result, initialQueueMetrics.depth);
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
mLastLateStreak = recoveryDecision.lateStreak;
mLastDropStreak = recoveryDecision.dropStreak;
}
{
std::lock_guard<std::mutex> lock(mOutputProducerMutex);
mLastOutputProductionCompletion = completion;
}
NotifyOutputProducer();
FillReadyOutputQueue(completion);
if (!ScheduleReadyOutputFrame())
if (!ScheduleReadyOutputFrame() &&
(ProduceReadyOutputFrames(completion, 1) == 0 || !ScheduleReadyOutputFrame()))
{
ScheduleBlackUnderrunFrame();
}
NotifyOutputProducer();
RecordBackendPlayoutHealth(completion.result, recoveryDecision);
}
void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, const VideoPlayoutRecoveryDecision& recoveryDecision)
{
const RenderOutputQueueMetrics queueMetrics = mReadyOutputQueue.GetMetrics();
std::size_t minReadyQueueDepth = 0;
std::size_t maxReadyQueueDepth = 0;
uint64_t readyQueueZeroDepthCount = 0;
double outputRenderMilliseconds = 0.0;
double smoothedOutputRenderMilliseconds = 0.0;
double maxOutputRenderMilliseconds = 0.0;
double outputFrameAcquireMilliseconds = 0.0;
double outputFrameRenderRequestMilliseconds = 0.0;
double outputFrameEndAccessMilliseconds = 0.0;
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
minReadyQueueDepth = mMinReadyQueueDepth;
maxReadyQueueDepth = mMaxReadyQueueDepth;
readyQueueZeroDepthCount = mReadyQueueZeroDepthCount;
outputRenderMilliseconds = mOutputRenderMilliseconds;
smoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds;
maxOutputRenderMilliseconds = mMaxOutputRenderMilliseconds;
outputFrameAcquireMilliseconds = mOutputFrameAcquireMilliseconds;
outputFrameRenderRequestMilliseconds = mOutputFrameRenderRequestMilliseconds;
outputFrameEndAccessMilliseconds = mOutputFrameEndAccessMilliseconds;
}
mHealthTelemetry.TryRecordBackendPlayoutHealth(
VideoBackendLifecycle::StateName(mLifecycle.State()),
CompletionResultName(result),
queueMetrics.depth,
queueMetrics.capacity,
queueMetrics.pushedCount,
mMinReadyQueueDepth,
mMaxReadyQueueDepth,
mReadyQueueZeroDepthCount,
minReadyQueueDepth,
maxReadyQueueDepth,
readyQueueZeroDepthCount,
queueMetrics.poppedCount,
queueMetrics.droppedCount,
queueMetrics.underrunCount,
mOutputRenderMilliseconds,
mSmoothedOutputRenderMilliseconds,
mMaxOutputRenderMilliseconds,
outputRenderMilliseconds,
smoothedOutputRenderMilliseconds,
maxOutputRenderMilliseconds,
outputFrameAcquireMilliseconds,
outputFrameRenderRequestMilliseconds,
outputFrameEndAccessMilliseconds,
recoveryDecision.completedFrameIndex,
recoveryDecision.scheduledFrameIndex,
recoveryDecision.scheduledLeadFrames,
@@ -397,50 +542,93 @@ void VideoBackend::RecordBackendPlayoutHealth(VideoIOCompletionResult result, co
StatusMessage());
}
bool VideoBackend::FillReadyOutputQueue(const VideoIOCompletion& completion)
std::size_t VideoBackend::ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames)
{
if (maxFrames == 0)
return 0;
std::lock_guard<std::mutex> productionLock(mOutputProductionMutex);
RenderOutputQueueMetrics metrics = mReadyOutputQueue.GetMetrics();
bool filledAny = false;
while (metrics.depth < mPlayoutPolicy.targetReadyFrames)
std::size_t producedFrames = 0;
while (producedFrames < maxFrames)
{
const OutputProductionDecision decision = mOutputProductionController.Decide(BuildOutputProductionPressure(metrics));
if (decision.action != OutputProductionAction::Produce)
break;
if (!RenderReadyOutputFrame(mVideoIODevice->State(), completion))
return filledAny;
filledAny = true;
break;
++producedFrames;
metrics = mReadyOutputQueue.GetMetrics();
RecordReadyQueueDepthSample(metrics);
}
return true;
return producedFrames;
}
OutputProductionPressure VideoBackend::BuildOutputProductionPressure(const RenderOutputQueueMetrics& metrics) const
{
OutputProductionPressure pressure;
pressure.readyQueueDepth = metrics.depth;
pressure.readyQueueCapacity = metrics.capacity;
pressure.readyQueueUnderrunCount = metrics.underrunCount;
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
pressure.lateStreak = mLastLateStreak;
pressure.dropStreak = mLastDropStreak;
}
return pressure;
}
bool VideoBackend::RenderReadyOutputFrame(const VideoIOState& state, const VideoIOCompletion& completion)
{
const auto renderStart = std::chrono::steady_clock::now();
VideoIOOutputFrame outputFrame;
const auto acquireStart = std::chrono::steady_clock::now();
if (!BeginOutputFrame(outputFrame))
return false;
const auto acquireEnd = std::chrono::steady_clock::now();
bool rendered = true;
const auto renderRequestStart = std::chrono::steady_clock::now();
if (mBridge)
rendered = mBridge->RenderScheduledFrame(state, completion, outputFrame);
const auto renderRequestEnd = std::chrono::steady_clock::now();
const auto endAccessStart = std::chrono::steady_clock::now();
EndOutputFrame(outputFrame);
const auto endAccessEnd = std::chrono::steady_clock::now();
const double acquireMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(acquireEnd - acquireStart).count();
const double renderRequestMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderRequestEnd - renderRequestStart).count();
const double endAccessMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(endAccessEnd - endAccessStart).count();
if (!rendered)
{
ApplyLifecycleTransition(VideoBackendLifecycleState::Degraded, "Output frame render request failed; skipping schedule for this frame.");
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
std::chrono::steady_clock::now() - renderStart).count();
RecordOutputRenderDuration(renderMilliseconds);
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
if (outputFrame.nativeFrame != nullptr)
{
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
outputFrame.nativeFrame = nullptr;
}
return false;
}
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
std::chrono::steady_clock::now() - renderStart).count();
RecordOutputRenderDuration(renderMilliseconds);
RecordOutputRenderDuration(renderMilliseconds, acquireMilliseconds, renderRequestMilliseconds, endAccessMilliseconds);
RenderOutputFrame readyFrame;
readyFrame.frame = outputFrame;
readyFrame.frameIndex = ++mNextReadyOutputFrameIndex;
return mReadyOutputQueue.Push(readyFrame);
const bool pushed = mReadyOutputQueue.Push(readyFrame);
if (!pushed && outputFrame.nativeFrame != nullptr)
{
static_cast<IUnknown*>(outputFrame.nativeFrame)->Release();
outputFrame.nativeFrame = nullptr;
}
return pushed;
}
bool VideoBackend::ScheduleReadyOutputFrame()
@@ -516,6 +704,7 @@ void VideoBackend::RecordFramePacing(VideoIOCompletionResult completionResult)
void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& metrics)
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
if (!mHasReadyQueueDepthBaseline)
{
mHasReadyQueueDepthBaseline = true;
@@ -532,14 +721,18 @@ void VideoBackend::RecordReadyQueueDepthSample(const RenderOutputQueueMetrics& m
++mReadyQueueZeroDepthCount;
}
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds)
void VideoBackend::RecordOutputRenderDuration(double renderMilliseconds, double acquireMilliseconds, double renderRequestMilliseconds, double endAccessMilliseconds)
{
std::lock_guard<std::mutex> lock(mOutputMetricsMutex);
mOutputRenderMilliseconds = (std::max)(renderMilliseconds, 0.0);
if (mSmoothedOutputRenderMilliseconds <= 0.0)
mSmoothedOutputRenderMilliseconds = mOutputRenderMilliseconds;
else
mSmoothedOutputRenderMilliseconds = mSmoothedOutputRenderMilliseconds * 0.9 + mOutputRenderMilliseconds * 0.1;
mMaxOutputRenderMilliseconds = (std::max)(mMaxOutputRenderMilliseconds, mOutputRenderMilliseconds);
mOutputFrameAcquireMilliseconds = (std::max)(acquireMilliseconds, 0.0);
mOutputFrameRenderRequestMilliseconds = (std::max)(renderRequestMilliseconds, 0.0);
mOutputFrameEndAccessMilliseconds = (std::max)(endAccessMilliseconds, 0.0);
PublishTimingSample("VideoBackend", "outputRender", mOutputRenderMilliseconds, "ms");
PublishTimingSample("VideoBackend", "smoothedOutputRender", mSmoothedOutputRenderMilliseconds, "ms");