Render timing

This commit is contained in:
Aiden
2026-05-12 22:18:27 +10:00
parent 13586c611a
commit 4a049a557a
15 changed files with 260 additions and 24 deletions

View File

@@ -35,7 +35,7 @@ DeckLinkOutputThread
never renders
```
Startup warms up real rendered frames before DeckLink scheduled playback starts. When DeckLink input is available, startup also waits briefly for three ready input frames before the render thread starts so the first render ticks are deliberate rather than lucky.
Startup builds one settled output reserve before DeckLink scheduled playback starts: the completed-frame reserve must reach the configured depth and remain ready for the configured settle window. When DeckLink input is available, startup also waits briefly for three ready input frames before the render thread starts so the first render ticks are deliberate rather than lucky.
## Current Scope
@@ -53,8 +53,7 @@ Included now:
- render-thread-owned input texture upload
- async PBO readback
- latest-N system-memory frame exchange
- rendered-frame warmup
- eight completed output warmup frames before DeckLink preroll, with DeckLink scheduled depth still targeted at four
- settled completed-frame output reserve before DeckLink preroll, with DeckLink scheduled depth still targeted at four
- background Slang compile of `shaders/happy-accident`
- app-owned display/render layer model for shader build readiness
- app-owned submission of a completed shader artifact
@@ -239,7 +238,7 @@ DeckLink output is an optional edge service in this app.
Startup order is:
1. start render thread
2. warm up rendered system-memory frames
2. build a settled completed-frame output reserve at normal render cadence
3. try to attach DeckLink output
4. start telemetry and HTTP either way
@@ -282,6 +281,13 @@ Input telemetry:
- `inputFramesReceived`: frames accepted into `InputFrameMailbox`
- `inputFramesDropped`: ready input frames dropped or missed because the mailbox was full
- `renderFrameMs`: most recent render-thread draw duration, excluding completed-readback copy and readback queue work
- `renderFrameBudgetUsedPercent`: most recent render draw time as a percentage of the selected frame budget
- `renderFrameMaxMs`: maximum observed render-thread draw duration for this process
- `readbackQueueMs`: time spent queueing the most recent async BGRA8 PBO readback
- `completedReadbackCopyMs`: time spent mapping/copying the most recent completed readback into system-memory frame storage
- `completedDrops`: completed unscheduled system-memory frames dropped so render could reuse the slot
- `acquireMisses`: times render/readback could not acquire a writable system-memory frame slot
- `inputConsumeMisses`: render ticks where no ready input frame was available to upload
- `inputUploadMisses`: input texture upload attempts that reused the previous GL input texture
- `inputReadyFrames`: ready input frames currently queued in `InputFrameMailbox`

View File

@@ -87,29 +87,13 @@ public:
}
mRuntimeLayers.StartStartupBuild(mConfig.runtimeShaderId);
Log("app", "Waiting for rendered warmup frames.");
if (!mFrameExchange.WaitForCompletedDepth(mConfig.warmupCompletedFrames, mConfig.warmupTimeout))
if (!BuildSettledOutputReserve(error))
{
error = "Timed out waiting for rendered warmup frames.";
LogError("app", error);
Stop();
return false;
}
if (mConfig.startupSettle > std::chrono::milliseconds::zero())
{
Log("app", "Settling render cadence before DeckLink output for " + std::to_string(mConfig.startupSettle.count()) + " ms.");
std::this_thread::sleep_for(mConfig.startupSettle);
Log("app", "Waiting for rendered reserve after startup settle.");
if (!mFrameExchange.WaitForCompletedDepth(mConfig.warmupCompletedFrames, mConfig.warmupTimeout))
{
error = "Timed out waiting for rendered reserve after startup settle.";
LogError("app", error);
Stop();
return false;
}
}
StartOptionalVideoOutput();
mTelemetryHealth.Start(mFrameExchange, mOutput, mOutputThread, mRenderThread);
StartHttpServer();
@@ -181,6 +165,24 @@ private:
Log("app", mVideoOutputStatus);
}
bool BuildSettledOutputReserve(std::string& error)
{
const auto reserveTimeout = mConfig.warmupTimeout + mConfig.startupSettle + mConfig.warmupTimeout;
Log("app",
"Building settled output reserve: waiting for " + std::to_string(mConfig.warmupCompletedFrames) +
" completed frame(s) to remain ready for " + std::to_string(mConfig.startupSettle.count()) + " ms.");
if (mFrameExchange.WaitForStableCompletedDepth(
mConfig.warmupCompletedFrames,
mConfig.startupSettle,
reserveTimeout))
{
return true;
}
error = "Timed out waiting for settled output reserve.";
return false;
}
void DisableVideoOutput(const std::string& reason)
{
mOutputThread.Stop();

View File

@@ -284,9 +284,9 @@ inline std::string RuntimeStateToJson(const RuntimeStateJsonInput& input)
writer.Key("performance");
writer.BeginObject();
writer.KeyDouble("frameBudgetMs", FrameDurationMillisecondsFromRateString(input.config.outputFrameRate));
writer.KeyNull("renderMs");
writer.KeyDouble("renderMs", input.telemetry.renderFrameMilliseconds);
writer.KeyNull("smoothedRenderMs");
writer.KeyNull("budgetUsedPercent");
writer.KeyDouble("budgetUsedPercent", input.telemetry.renderFrameBudgetUsedPercent);
writer.KeyNull("completionIntervalMs");
writer.KeyNull("smoothedCompletionIntervalMs");
writer.KeyNull("maxCompletionIntervalMs");

View File

@@ -130,6 +130,51 @@ bool SystemFrameExchange::WaitForCompletedDepth(std::size_t targetDepth, std::ch
});
}
bool SystemFrameExchange::WaitForStableCompletedDepth(
std::size_t targetDepth,
std::chrono::milliseconds stableDuration,
std::chrono::milliseconds timeout)
{
if (targetDepth == 0)
return true;
const auto deadline = std::chrono::steady_clock::now() + timeout;
std::unique_lock<std::mutex> lock(mMutex);
bool stableWindowStarted = false;
std::chrono::steady_clock::time_point stableSince;
while (true)
{
const auto now = std::chrono::steady_clock::now();
if (now >= deadline)
return false;
if (CompletedCountLocked() >= targetDepth)
{
if (stableDuration <= std::chrono::milliseconds::zero())
return true;
if (!stableWindowStarted)
{
stableSince = now;
stableWindowStarted = true;
}
const auto stableDeadline = stableSince + stableDuration;
if (now >= stableDeadline)
return true;
mCondition.wait_until(lock, stableDeadline < deadline ? stableDeadline : deadline);
continue;
}
stableWindowStarted = false;
mCondition.wait_until(lock, deadline, [&]() {
return CompletedCountLocked() >= targetDepth;
});
}
}
void SystemFrameExchange::Clear()
{
std::lock_guard<std::mutex> lock(mMutex);

View File

@@ -22,6 +22,10 @@ public:
bool ConsumeCompletedForSchedule(SystemFrame& frame);
bool ReleaseScheduledByBytes(void* bytes);
bool WaitForCompletedDepth(std::size_t targetDepth, std::chrono::milliseconds timeout);
bool WaitForStableCompletedDepth(
std::size_t targetDepth,
std::chrono::milliseconds stableDuration,
std::chrono::milliseconds timeout);
void Clear();
SystemFrameExchangeMetrics Metrics() const;

View File

@@ -85,6 +85,11 @@ RenderThread::Metrics RenderThread::GetMetrics() const
metrics.skippedFrames = mSkippedFrames.load(std::memory_order_relaxed);
metrics.shaderBuildsCommitted = mShaderBuildsCommitted.load(std::memory_order_relaxed);
metrics.shaderBuildFailures = mShaderBuildFailures.load(std::memory_order_relaxed);
metrics.renderFrameMilliseconds = mRenderFrameMilliseconds.load(std::memory_order_relaxed);
metrics.renderFrameBudgetUsedPercent = mRenderFrameBudgetUsedPercent.load(std::memory_order_relaxed);
metrics.renderFrameMaxMilliseconds = mRenderFrameMaxMilliseconds.load(std::memory_order_relaxed);
metrics.readbackQueueMilliseconds = mReadbackQueueMilliseconds.load(std::memory_order_relaxed);
metrics.completedReadbackCopyMilliseconds = mCompletedReadbackCopyMilliseconds.load(std::memory_order_relaxed);
metrics.inputFramesReceived = mInputFramesReceived.load(std::memory_order_relaxed);
metrics.inputFramesDropped = mInputFramesDropped.load(std::memory_order_relaxed);
metrics.inputConsumeMisses = mInputConsumeMisses.load(std::memory_order_relaxed);
@@ -154,6 +159,7 @@ void RenderThread::ThreadMain()
CountAcquireMiss();
},
[this]() { CountCompleted(); });
PublishReadbackMetrics(readback);
const auto now = RenderCadenceClock::Clock::now();
const RenderCadenceClock::Tick tick = clock.Poll(now);
@@ -178,6 +184,7 @@ void RenderThread::ThreadMain()
{
mPboQueueMisses.fetch_add(1, std::memory_order_relaxed);
}
PublishReadbackMetrics(readback);
CountRendered();
++frameIndex;
@@ -196,6 +203,7 @@ void RenderThread::ThreadMain()
CountAcquireMiss();
},
[this]() { CountCompleted(); });
PublishReadbackMetrics(readback);
}
readback.Shutdown();
@@ -237,6 +245,29 @@ void RenderThread::CountAcquireMiss()
mAcquireMisses.fetch_add(1, std::memory_order_relaxed);
}
void RenderThread::PublishReadbackMetrics(const Bgra8ReadbackPipeline& readback)
{
const double renderMilliseconds = readback.LastRenderFrameMilliseconds();
mRenderFrameMilliseconds.store(renderMilliseconds, std::memory_order_relaxed);
if (mConfig.frameDurationMilliseconds > 0.0)
{
mRenderFrameBudgetUsedPercent.store(
(renderMilliseconds / mConfig.frameDurationMilliseconds) * 100.0,
std::memory_order_relaxed);
}
else
{
mRenderFrameBudgetUsedPercent.store(0.0, std::memory_order_relaxed);
}
const double previousMax = mRenderFrameMaxMilliseconds.load(std::memory_order_relaxed);
if (renderMilliseconds > previousMax)
mRenderFrameMaxMilliseconds.store(renderMilliseconds, std::memory_order_relaxed);
mReadbackQueueMilliseconds.store(readback.LastReadbackQueueMilliseconds(), std::memory_order_relaxed);
mCompletedReadbackCopyMilliseconds.store(readback.LastCompletedReadbackCopyMilliseconds(), std::memory_order_relaxed);
}
void RenderThread::PublishInputMetrics(const InputFrameTexture& inputTexture)
{
if (mInputMailbox != nullptr)

View File

@@ -16,6 +16,7 @@
class SystemFrameExchange;
class InputFrameMailbox;
class InputFrameTexture;
class Bgra8ReadbackPipeline;
class RenderThread
{
@@ -38,6 +39,11 @@ public:
uint64_t skippedFrames = 0;
uint64_t shaderBuildsCommitted = 0;
uint64_t shaderBuildFailures = 0;
double renderFrameMilliseconds = 0.0;
double renderFrameBudgetUsedPercent = 0.0;
double renderFrameMaxMilliseconds = 0.0;
double readbackQueueMilliseconds = 0.0;
double completedReadbackCopyMilliseconds = 0.0;
uint64_t inputFramesReceived = 0;
uint64_t inputFramesDropped = 0;
uint64_t inputConsumeMisses = 0;
@@ -71,6 +77,7 @@ private:
void CountRendered();
void CountCompleted();
void CountAcquireMiss();
void PublishReadbackMetrics(const Bgra8ReadbackPipeline& readback);
void PublishInputMetrics(const InputFrameTexture& inputTexture);
void TryCommitReadyRuntimeShader(RuntimeRenderScene& runtimeRenderScene);
bool TryTakePendingRuntimeShaderArtifact(RuntimeShaderArtifact& artifact);
@@ -96,6 +103,11 @@ private:
std::atomic<uint64_t> mSkippedFrames{ 0 };
std::atomic<uint64_t> mShaderBuildsCommitted{ 0 };
std::atomic<uint64_t> mShaderBuildFailures{ 0 };
std::atomic<double> mRenderFrameMilliseconds{ 0.0 };
std::atomic<double> mRenderFrameBudgetUsedPercent{ 0.0 };
std::atomic<double> mRenderFrameMaxMilliseconds{ 0.0 };
std::atomic<double> mReadbackQueueMilliseconds{ 0.0 };
std::atomic<double> mCompletedReadbackCopyMilliseconds{ 0.0 };
std::atomic<uint64_t> mInputFramesReceived{ 0 };
std::atomic<uint64_t> mInputFramesDropped{ 0 };
std::atomic<uint64_t> mInputConsumeMisses{ 0 };

View File

@@ -2,8 +2,18 @@
#include "../frames/SystemFrameTypes.h"
#include <chrono>
#include <cstring>
namespace
{
double MillisecondsSince(std::chrono::steady_clock::time_point start)
{
return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(
std::chrono::steady_clock::now() - start).count();
}
}
Bgra8ReadbackPipeline::~Bgra8ReadbackPipeline()
{
Shutdown();
@@ -50,10 +60,15 @@ bool Bgra8ReadbackPipeline::RenderAndQueue(uint64_t frameIndex, const RenderCall
return false;
glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer);
const auto renderStart = std::chrono::steady_clock::now();
renderFrame(frameIndex);
mLastRenderFrameMilliseconds = MillisecondsSince(renderStart);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
return mPboRing.QueueReadback(mFramebuffer, mWidth, mHeight, frameIndex);
const auto queueStart = std::chrono::steady_clock::now();
const bool queued = mPboRing.QueueReadback(mFramebuffer, mWidth, mHeight, frameIndex);
mLastReadbackQueueMilliseconds = MillisecondsSince(queueStart);
return queued;
}
void Bgra8ReadbackPipeline::ConsumeCompleted(
@@ -68,12 +83,14 @@ void Bgra8ReadbackPipeline::ConsumeCompleted(
PboReadbackRing::CompletedReadback readback;
while (mPboRing.TryAcquireCompleted(readback))
{
const auto copyStart = std::chrono::steady_clock::now();
glBindBuffer(GL_PIXEL_PACK_BUFFER, readback.pbo);
void* mapped = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
if (!mapped)
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
mPboRing.ReleaseCompleted(readback);
mLastCompletedReadbackCopyMilliseconds = MillisecondsSince(copyStart);
continue;
}
@@ -99,6 +116,7 @@ void Bgra8ReadbackPipeline::ConsumeCompleted(
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
mPboRing.ReleaseCompleted(readback);
mLastCompletedReadbackCopyMilliseconds = MillisecondsSince(copyStart);
}
}

View File

@@ -38,6 +38,9 @@ public:
unsigned RowBytes() const { return mRowBytes; }
VideoIOPixelFormat PixelFormat() const { return VideoIOPixelFormat::Bgra8; }
uint64_t PboQueueMisses() const { return mPboRing.QueueMisses(); }
double LastRenderFrameMilliseconds() const { return mLastRenderFrameMilliseconds; }
double LastReadbackQueueMilliseconds() const { return mLastReadbackQueueMilliseconds; }
double LastCompletedReadbackCopyMilliseconds() const { return mLastCompletedReadbackCopyMilliseconds; }
private:
bool CreateRenderTarget();
@@ -48,5 +51,8 @@ private:
unsigned mRowBytes = 0;
GLuint mFramebuffer = 0;
GLuint mTexture = 0;
double mLastRenderFrameMilliseconds = 0.0;
double mLastReadbackQueueMilliseconds = 0.0;
double mLastCompletedReadbackCopyMilliseconds = 0.0;
PboReadbackRing mPboRing;
};

View File

@@ -19,6 +19,8 @@ struct CadenceTelemetrySnapshot
uint64_t scheduledTotal = 0;
uint64_t completedPollMisses = 0;
uint64_t scheduleFailures = 0;
uint64_t completedDrops = 0;
uint64_t acquireMisses = 0;
uint64_t completions = 0;
uint64_t displayedLate = 0;
uint64_t dropped = 0;
@@ -26,6 +28,11 @@ struct CadenceTelemetrySnapshot
uint64_t clockSkippedFrames = 0;
uint64_t shaderBuildsCommitted = 0;
uint64_t shaderBuildFailures = 0;
double renderFrameMilliseconds = 0.0;
double renderFrameBudgetUsedPercent = 0.0;
double renderFrameMaxMilliseconds = 0.0;
double readbackQueueMilliseconds = 0.0;
double completedReadbackCopyMilliseconds = 0.0;
uint64_t inputFramesReceived = 0;
uint64_t inputFramesDropped = 0;
uint64_t inputConsumeMisses = 0;
@@ -77,6 +84,8 @@ public:
snapshot.scheduleFailures = outputMetrics.scheduleFailures > threadMetrics.scheduleFailures
? outputMetrics.scheduleFailures
: threadMetrics.scheduleFailures;
snapshot.completedDrops = exchangeMetrics.completedDrops;
snapshot.acquireMisses = exchangeMetrics.acquireMisses;
snapshot.completions = outputMetrics.completions;
snapshot.displayedLate = outputMetrics.displayedLate;
snapshot.dropped = outputMetrics.dropped;
@@ -110,6 +119,11 @@ public:
snapshot.clockSkippedFrames = renderMetrics.skippedFrames;
snapshot.shaderBuildsCommitted = renderMetrics.shaderBuildsCommitted;
snapshot.shaderBuildFailures = renderMetrics.shaderBuildFailures;
snapshot.renderFrameMilliseconds = renderMetrics.renderFrameMilliseconds;
snapshot.renderFrameBudgetUsedPercent = renderMetrics.renderFrameBudgetUsedPercent;
snapshot.renderFrameMaxMilliseconds = renderMetrics.renderFrameMaxMilliseconds;
snapshot.readbackQueueMilliseconds = renderMetrics.readbackQueueMilliseconds;
snapshot.completedReadbackCopyMilliseconds = renderMetrics.completedReadbackCopyMilliseconds;
snapshot.inputFramesReceived = renderMetrics.inputFramesReceived;
snapshot.inputFramesDropped = renderMetrics.inputFramesDropped;
snapshot.inputConsumeMisses = renderMetrics.inputConsumeMisses;

View File

@@ -21,6 +21,8 @@ inline void WriteCadenceTelemetryJson(JsonWriter& writer, const CadenceTelemetry
writer.KeyUInt("scheduledTotal", snapshot.scheduledTotal);
writer.KeyUInt("completedPollMisses", snapshot.completedPollMisses);
writer.KeyUInt("scheduleFailures", snapshot.scheduleFailures);
writer.KeyUInt("completedDrops", snapshot.completedDrops);
writer.KeyUInt("acquireMisses", snapshot.acquireMisses);
writer.KeyUInt("completions", snapshot.completions);
writer.KeyUInt("late", snapshot.displayedLate);
writer.KeyUInt("dropped", snapshot.dropped);
@@ -30,6 +32,11 @@ inline void WriteCadenceTelemetryJson(JsonWriter& writer, const CadenceTelemetry
writer.KeyUInt("clockSkipped", snapshot.clockSkippedFrames);
writer.KeyUInt("shaderCommitted", snapshot.shaderBuildsCommitted);
writer.KeyUInt("shaderFailures", snapshot.shaderBuildFailures);
writer.KeyDouble("renderFrameMs", snapshot.renderFrameMilliseconds);
writer.KeyDouble("renderFrameBudgetUsedPercent", snapshot.renderFrameBudgetUsedPercent);
writer.KeyDouble("renderFrameMaxMs", snapshot.renderFrameMaxMilliseconds);
writer.KeyDouble("readbackQueueMs", snapshot.readbackQueueMilliseconds);
writer.KeyDouble("completedReadbackCopyMs", snapshot.completedReadbackCopyMilliseconds);
writer.KeyUInt("inputFramesReceived", snapshot.inputFramesReceived);
writer.KeyUInt("inputFramesDropped", snapshot.inputFramesDropped);
writer.KeyUInt("inputConsumeMisses", snapshot.inputConsumeMisses);