From 709d3d3fa4791ad7c2603a418ad4d64b1dd4cd64 Mon Sep 17 00:00:00 2001 From: Aiden <68633820+awils27@users.noreply.github.com> Date: Tue, 12 May 2026 01:30:30 +1000 Subject: [PATCH] Test works --- .vscode/launch.json | 17 + .vscode/tasks.json | 16 + CMakeLists.txt | 44 + .../DeckLinkRenderCadenceProbe.cpp | 920 ++++++++++++++++++ apps/DeckLinkRenderCadenceProbe/README.md | 113 +++ 5 files changed, 1110 insertions(+) create mode 100644 apps/DeckLinkRenderCadenceProbe/DeckLinkRenderCadenceProbe.cpp create mode 100644 apps/DeckLinkRenderCadenceProbe/README.md diff --git a/.vscode/launch.json b/.vscode/launch.json index 706a9ad..6666694 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -61,6 +61,23 @@ "moduleLoad": true }, "preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64" + }, + { + "name": "Debug DeckLinkRenderCadenceProbe", + "type": "cppvsdbg", + "request": "launch", + "program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\DeckLinkRenderCadenceProbe.exe", + "args": [], + "stopAtEntry": false, + "cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug", + "environment": [], + "console": "externalTerminal", + "symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug", + "requireExactSource": true, + "logging": { + "moduleLoad": true + }, + "preLaunchTask": "Build DeckLinkRenderCadenceProbe Debug x64" } ] } diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 5ecd7e6..61b2a1e 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -36,6 +36,22 @@ "group": "build", "problemMatcher": "$msCompile" }, + { + "label": "Build DeckLinkRenderCadenceProbe Debug x64", + "type": "process", + "command": "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\CMake\\bin\\cmake.exe", + "args": [ + "--build", + "${workspaceFolder}\\build\\vs2022-x64-debug", + "--config", + "Debug", + "--target", + "DeckLinkRenderCadenceProbe", + "--parallel" + ], + "group": "build", + "problemMatcher": "$msCompile" + }, { "label": "Clean LoopThroughWithOpenGLCompositing Debug x64", "type": "process", diff --git a/CMakeLists.txt b/CMakeLists.txt index 9655354..6cb250c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,6 +229,50 @@ if(MSVC) target_compile_options(LoopThroughWithOpenGLCompositing PRIVATE /W3) endif() +set(PROBE_APP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/apps/DeckLinkRenderCadenceProbe") + +add_executable(DeckLinkRenderCadenceProbe + "${APP_DIR}/videoio/decklink/DeckLinkAPI_i.c" + "${APP_DIR}/videoio/decklink/DeckLinkDisplayMode.cpp" + "${APP_DIR}/videoio/decklink/DeckLinkDisplayMode.h" + "${APP_DIR}/videoio/decklink/DeckLinkFrameTransfer.cpp" + "${APP_DIR}/videoio/decklink/DeckLinkFrameTransfer.h" + "${APP_DIR}/videoio/decklink/DeckLinkSession.cpp" + "${APP_DIR}/videoio/decklink/DeckLinkSession.h" + "${APP_DIR}/videoio/decklink/DeckLinkVideoIOFormat.cpp" + "${APP_DIR}/videoio/decklink/DeckLinkVideoIOFormat.h" + "${APP_DIR}/gl/renderer/GLExtensions.cpp" + "${APP_DIR}/gl/renderer/GLExtensions.h" + "${APP_DIR}/videoio/VideoIOFormat.cpp" + "${APP_DIR}/videoio/VideoIOFormat.h" + "${APP_DIR}/videoio/VideoIOTypes.h" + "${APP_DIR}/videoio/VideoPlayoutPolicy.h" + "${APP_DIR}/videoio/VideoPlayoutScheduler.cpp" + "${APP_DIR}/videoio/VideoPlayoutScheduler.h" + "${PROBE_APP_DIR}/DeckLinkRenderCadenceProbe.cpp" +) + +target_include_directories(DeckLinkRenderCadenceProbe PRIVATE + "${APP_DIR}" + "${APP_DIR}/gl/renderer" + "${APP_DIR}/videoio" + "${APP_DIR}/videoio/decklink" +) + +target_link_libraries(DeckLinkRenderCadenceProbe PRIVATE + opengl32 + Ole32 +) + +target_compile_definitions(DeckLinkRenderCadenceProbe PRIVATE + _UNICODE + UNICODE +) + +if(MSVC) + target_compile_options(DeckLinkRenderCadenceProbe PRIVATE /W3) +endif() + add_executable(RuntimeJsonTests "${APP_DIR}/runtime/support/RuntimeJson.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/tests/RuntimeJsonTests.cpp" diff --git a/apps/DeckLinkRenderCadenceProbe/DeckLinkRenderCadenceProbe.cpp b/apps/DeckLinkRenderCadenceProbe/DeckLinkRenderCadenceProbe.cpp new file mode 100644 index 0000000..2942e31 --- /dev/null +++ b/apps/DeckLinkRenderCadenceProbe/DeckLinkRenderCadenceProbe.cpp @@ -0,0 +1,920 @@ +#include "DeckLinkSession.h" +#include "GLExtensions.h" +#include "VideoIOFormat.h" +#include "VideoPlayoutPolicy.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +constexpr unsigned kDefaultWidth = 1920; +constexpr unsigned kDefaultHeight = 1080; +constexpr std::size_t kSystemFrameSlots = 12; +constexpr std::size_t kPboDepth = 6; +constexpr std::size_t kWarmupFrames = 4; +constexpr std::size_t kDeckLinkTargetBufferedFrames = 4; + +enum class ProbeSlotState +{ + Free, + Rendering, + Completed, + Scheduled +}; + +struct ProbeFrame +{ + void* bytes = nullptr; + long rowBytes = 0; + unsigned width = 0; + unsigned height = 0; + VideoIOPixelFormat pixelFormat = VideoIOPixelFormat::Bgra8; + std::size_t index = 0; + uint64_t generation = 0; + uint64_t frameIndex = 0; +}; + +struct ProbeMetrics +{ + uint64_t renderedFrames = 0; + uint64_t completedFrames = 0; + uint64_t scheduledFrames = 0; + uint64_t completedDrops = 0; + uint64_t acquireMisses = 0; + uint64_t scheduleUnderruns = 0; + uint64_t pboQueueMisses = 0; + std::size_t freeCount = 0; + std::size_t renderingCount = 0; + std::size_t completedCount = 0; + std::size_t scheduledCount = 0; +}; + +class LatestFrameStore +{ +public: + LatestFrameStore(unsigned width, unsigned height, std::size_t capacity) : + mWidth(width), + mHeight(height), + mRowBytes(VideoIORowBytes(VideoIOPixelFormat::Bgra8, width)) + { + mSlots.resize(capacity); + const std::size_t byteCount = static_cast(mRowBytes) * static_cast(mHeight); + for (Slot& slot : mSlots) + { + slot.bytes.resize(byteCount); + slot.generation = 1; + } + } + + bool AcquireForRender(ProbeFrame& frame) + { + std::lock_guard lock(mMutex); + if (!AcquireFreeLocked(frame)) + { + if (!DropOldestCompletedLocked() || !AcquireFreeLocked(frame)) + { + ++mMetrics.acquireMisses; + return false; + } + } + return true; + } + + bool PublishCompleted(const ProbeFrame& frame) + { + std::lock_guard lock(mMutex); + if (!IsValidLocked(frame)) + return false; + Slot& slot = mSlots[frame.index]; + if (slot.state != ProbeSlotState::Rendering) + return false; + slot.state = ProbeSlotState::Completed; + slot.frameIndex = frame.frameIndex; + mCompletedIndices.push_back(frame.index); + ++mMetrics.completedFrames; + mCondition.notify_all(); + return true; + } + + bool ConsumeCompleted(ProbeFrame& frame) + { + std::lock_guard lock(mMutex); + while (!mCompletedIndices.empty()) + { + const std::size_t index = mCompletedIndices.front(); + mCompletedIndices.pop_front(); + if (index >= mSlots.size() || mSlots[index].state != ProbeSlotState::Completed) + continue; + mSlots[index].state = ProbeSlotState::Scheduled; + FillFrameLocked(index, frame); + ++mMetrics.scheduledFrames; + return true; + } + ++mMetrics.scheduleUnderruns; + return false; + } + + bool ReleaseByBytes(void* bytes) + { + if (bytes == nullptr) + return false; + std::lock_guard lock(mMutex); + for (std::size_t index = 0; index < mSlots.size(); ++index) + { + if (mSlots[index].bytes.data() != bytes) + continue; + mSlots[index].state = ProbeSlotState::Free; + ++mSlots[index].generation; + RemoveCompletedIndexLocked(index); + mCondition.notify_all(); + return true; + } + return false; + } + + bool WaitForCompletedDepth(std::size_t targetDepth, std::chrono::milliseconds timeout) + { + std::unique_lock lock(mMutex); + return mCondition.wait_for(lock, timeout, [&]() { + return CompletedCountLocked() >= targetDepth; + }); + } + + ProbeMetrics Metrics() const + { + std::lock_guard lock(mMutex); + ProbeMetrics metrics = mMetrics; + for (const Slot& slot : mSlots) + { + switch (slot.state) + { + case ProbeSlotState::Free: + ++metrics.freeCount; + break; + case ProbeSlotState::Rendering: + ++metrics.renderingCount; + break; + case ProbeSlotState::Completed: + ++metrics.completedCount; + break; + case ProbeSlotState::Scheduled: + ++metrics.scheduledCount; + break; + } + } + return metrics; + } + + void CountRenderedFrame() + { + std::lock_guard lock(mMutex); + ++mMetrics.renderedFrames; + } + + void CountPboQueueMiss() + { + std::lock_guard lock(mMutex); + ++mMetrics.pboQueueMisses; + } + +private: + struct Slot + { + std::vector bytes; + ProbeSlotState state = ProbeSlotState::Free; + uint64_t generation = 1; + uint64_t frameIndex = 0; + }; + + bool AcquireFreeLocked(ProbeFrame& frame) + { + for (std::size_t index = 0; index < mSlots.size(); ++index) + { + if (mSlots[index].state != ProbeSlotState::Free) + continue; + mSlots[index].state = ProbeSlotState::Rendering; + ++mSlots[index].generation; + FillFrameLocked(index, frame); + return true; + } + return false; + } + + bool DropOldestCompletedLocked() + { + while (!mCompletedIndices.empty()) + { + const std::size_t index = mCompletedIndices.front(); + mCompletedIndices.pop_front(); + if (index >= mSlots.size() || mSlots[index].state != ProbeSlotState::Completed) + continue; + mSlots[index].state = ProbeSlotState::Free; + ++mSlots[index].generation; + ++mMetrics.completedDrops; + return true; + } + return false; + } + + void FillFrameLocked(std::size_t index, ProbeFrame& frame) const + { + const Slot& slot = mSlots[index]; + frame.bytes = const_cast(slot.bytes.data()); + frame.rowBytes = static_cast(mRowBytes); + frame.width = mWidth; + frame.height = mHeight; + frame.pixelFormat = VideoIOPixelFormat::Bgra8; + frame.index = index; + frame.generation = slot.generation; + frame.frameIndex = slot.frameIndex; + } + + bool IsValidLocked(const ProbeFrame& frame) const + { + return frame.index < mSlots.size() && mSlots[frame.index].generation == frame.generation; + } + + void RemoveCompletedIndexLocked(std::size_t index) + { + mCompletedIndices.erase(std::remove(mCompletedIndices.begin(), mCompletedIndices.end(), index), mCompletedIndices.end()); + } + + std::size_t CompletedCountLocked() const + { + std::size_t count = 0; + for (const Slot& slot : mSlots) + { + if (slot.state == ProbeSlotState::Completed) + ++count; + } + return count; + } + + unsigned mWidth = 0; + unsigned mHeight = 0; + unsigned mRowBytes = 0; + std::vector mSlots; + std::deque mCompletedIndices; + mutable std::mutex mMutex; + std::condition_variable mCondition; + ProbeMetrics mMetrics; +}; + +LRESULT CALLBACK ProbeWindowProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) +{ + return DefWindowProc(hwnd, message, wParam, lParam); +} + +class HiddenOpenGLContext +{ +public: + ~HiddenOpenGLContext() + { + Destroy(); + } + + bool Create(unsigned width, unsigned height, std::string& error) + { + mInstance = GetModuleHandle(nullptr); + WNDCLASSA wc = {}; + wc.style = CS_OWNDC; + wc.lpfnWndProc = ProbeWindowProc; + wc.hInstance = mInstance; + wc.lpszClassName = "DeckLinkRenderCadenceProbeWindow"; + RegisterClassA(&wc); + + mWindow = CreateWindowA( + wc.lpszClassName, + "DeckLink Render Cadence Probe", + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, + CW_USEDEFAULT, + static_cast(width), + static_cast(height), + nullptr, + nullptr, + mInstance, + nullptr); + if (!mWindow) + { + error = "CreateWindowA failed."; + return false; + } + + mDc = GetDC(mWindow); + if (!mDc) + { + error = "GetDC failed."; + return false; + } + + PIXELFORMATDESCRIPTOR pfd = {}; + pfd.nSize = sizeof(pfd); + pfd.nVersion = 1; + pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.cColorBits = 32; + pfd.cDepthBits = 0; + pfd.iLayerType = PFD_MAIN_PLANE; + + const int pixelFormat = ChoosePixelFormat(mDc, &pfd); + if (pixelFormat == 0 || !SetPixelFormat(mDc, pixelFormat, &pfd)) + { + error = "Could not choose/set a pixel format."; + return false; + } + + mGlrc = wglCreateContext(mDc); + if (!mGlrc) + { + error = "wglCreateContext failed."; + return false; + } + return true; + } + + bool MakeCurrent() + { + return mDc && mGlrc && wglMakeCurrent(mDc, mGlrc); + } + + void ClearCurrent() + { + wglMakeCurrent(nullptr, nullptr); + } + + void Destroy() + { + ClearCurrent(); + if (mGlrc) + { + wglDeleteContext(mGlrc); + mGlrc = nullptr; + } + if (mWindow && mDc) + { + ReleaseDC(mWindow, mDc); + mDc = nullptr; + } + if (mWindow) + { + DestroyWindow(mWindow); + mWindow = nullptr; + } + } + +private: + HINSTANCE mInstance = nullptr; + HWND mWindow = nullptr; + HDC mDc = nullptr; + HGLRC mGlrc = nullptr; +}; + +class RenderCadenceProbe +{ +public: + RenderCadenceProbe(LatestFrameStore& frameStore, unsigned width, unsigned height, double frameDurationMs) : + mFrameStore(frameStore), + mWidth(width), + mHeight(height), + mFrameDuration(std::chrono::duration_cast(std::chrono::duration(frameDurationMs))) + { + if (mFrameDuration <= Clock::duration::zero()) + mFrameDuration = std::chrono::milliseconds(16); + } + + bool Start(std::string& error) + { + mStopping = false; + mThread = std::thread([this]() { ThreadMain(); }); + std::unique_lock lock(mStartupMutex); + if (!mStartupCondition.wait_for(lock, std::chrono::seconds(3), [this]() { return mStarted || !mStartupError.empty(); })) + { + error = "Timed out starting render thread."; + return false; + } + if (!mStartupError.empty()) + { + error = mStartupError; + return false; + } + return true; + } + + void Stop() + { + mStopping = true; + if (mThread.joinable()) + mThread.join(); + } + +private: + struct PboSlot + { + GLuint pbo = 0; + GLsync fence = nullptr; + bool inFlight = false; + uint64_t frameIndex = 0; + }; + + using Clock = std::chrono::steady_clock; + + void ThreadMain() + { + std::string error; + HiddenOpenGLContext context; + if (!context.Create(mWidth, mHeight, error) || !context.MakeCurrent()) + { + SignalStartupFailure(error.empty() ? "OpenGL context creation failed." : error); + return; + } + if (!ResolveGLExtensions()) + { + SignalStartupFailure("OpenGL extension resolution failed."); + return; + } + if (!CreateRenderTargets()) + { + SignalStartupFailure("OpenGL render target creation failed."); + return; + } + CreatePbos(); + SignalStarted(); + + auto nextRenderTime = Clock::now(); + while (!mStopping) + { + ConsumeCompletedPbos(); + + const auto now = Clock::now(); + if (now < nextRenderTime) + { + std::this_thread::sleep_for((std::min)(std::chrono::milliseconds(1), std::chrono::duration_cast(nextRenderTime - now))); + continue; + } + + RenderPattern(mFrameIndex); + if (!QueueReadback(mFrameIndex)) + mFrameStore.CountPboQueueMiss(); + mFrameStore.CountRenderedFrame(); + ++mFrameIndex; + nextRenderTime += mFrameDuration; + if (Clock::now() - nextRenderTime > mFrameDuration * 4) + nextRenderTime = Clock::now() + mFrameDuration; + } + + FlushPbos(); + DestroyPbos(); + DestroyRenderTargets(); + context.ClearCurrent(); + } + + bool CreateRenderTargets() + { + glGenFramebuffers(1, &mFramebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer); + glGenTextures(1, &mTexture); + glBindTexture(GL_TEXTURE_2D, mTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, static_cast(mWidth), static_cast(mHeight), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mTexture, 0); + const bool complete = glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE; + glBindTexture(GL_TEXTURE_2D, 0); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + return complete; + } + + void DestroyRenderTargets() + { + if (mFramebuffer != 0) + glDeleteFramebuffers(1, &mFramebuffer); + if (mTexture != 0) + glDeleteTextures(1, &mTexture); + mFramebuffer = 0; + mTexture = 0; + } + + void CreatePbos() + { + mPbos.resize(kPboDepth); + const std::size_t byteCount = static_cast(VideoIORowBytes(VideoIOPixelFormat::Bgra8, mWidth)) * mHeight; + for (PboSlot& slot : mPbos) + { + glGenBuffers(1, &slot.pbo); + glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pbo); + glBufferData(GL_PIXEL_PACK_BUFFER, static_cast(byteCount), nullptr, GL_STREAM_READ); + } + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + + void DestroyPbos() + { + for (PboSlot& slot : mPbos) + { + if (slot.fence) + glDeleteSync(slot.fence); + if (slot.pbo != 0) + glDeleteBuffers(1, &slot.pbo); + slot = {}; + } + mPbos.clear(); + } + + void FlushPbos() + { + for (std::size_t i = 0; i < mPbos.size() * 2; ++i) + ConsumeCompletedPbos(); + } + + void RenderPattern(uint64_t frameIndex) + { + const float t = static_cast(frameIndex) / 60.0f; + const float red = 0.1f + 0.4f * (0.5f + 0.5f * std::sin(t)); + const float green = 0.1f + 0.4f * (0.5f + 0.5f * std::sin(t * 0.73f + 1.0f)); + const float blue = 0.15f + 0.3f * (0.5f + 0.5f * std::sin(t * 0.41f + 2.0f)); + + glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer); + glViewport(0, 0, static_cast(mWidth), static_cast(mHeight)); + glDisable(GL_SCISSOR_TEST); + glClearColor(red, green, blue, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + + const int boxWidth = static_cast(mWidth / 6); + const int boxHeight = static_cast(mHeight / 5); + const float phase = 0.5f + 0.5f * std::sin(t * 1.7f); + const int x = static_cast(phase * static_cast(mWidth - boxWidth)); + const int y = static_cast((0.5f + 0.5f * std::sin(t * 1.1f + 0.8f)) * static_cast(mHeight - boxHeight)); + + glEnable(GL_SCISSOR_TEST); + glScissor(x, y, boxWidth, boxHeight); + glClearColor(1.0f - red, 0.85f, 0.15f + blue, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + glDisable(GL_SCISSOR_TEST); + } + + bool QueueReadback(uint64_t frameIndex) + { + if (mPbos.empty()) + return false; + + PboSlot& slot = mPbos[mWriteIndex]; + if (slot.inFlight) + return false; + + const std::size_t byteCount = static_cast(VideoIORowBytes(VideoIOPixelFormat::Bgra8, mWidth)) * mHeight; + glBindFramebuffer(GL_READ_FRAMEBUFFER, mFramebuffer); + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pbo); + glBufferData(GL_PIXEL_PACK_BUFFER, static_cast(byteCount), nullptr, GL_STREAM_READ); + glReadPixels(0, 0, static_cast(mWidth), static_cast(mHeight), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr); + slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + slot.inFlight = slot.fence != nullptr; + slot.frameIndex = frameIndex; + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + mWriteIndex = (mWriteIndex + 1) % mPbos.size(); + return slot.inFlight; + } + + void ConsumeCompletedPbos() + { + for (std::size_t checked = 0; checked < mPbos.size(); ++checked) + { + PboSlot& slot = mPbos[mReadIndex]; + if (!slot.inFlight || slot.fence == nullptr) + { + mReadIndex = (mReadIndex + 1) % mPbos.size(); + continue; + } + + const GLenum waitResult = glClientWaitSync(slot.fence, 0, 0); + if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED) + return; + + ProbeFrame frame; + if (mFrameStore.AcquireForRender(frame)) + { + glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pbo); + void* mapped = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); + if (mapped) + { + const std::size_t byteCount = static_cast(frame.rowBytes) * frame.height; + std::memcpy(frame.bytes, mapped, byteCount); + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + frame.frameIndex = slot.frameIndex; + mFrameStore.PublishCompleted(frame); + } + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + + glDeleteSync(slot.fence); + slot.fence = nullptr; + slot.inFlight = false; + mReadIndex = (mReadIndex + 1) % mPbos.size(); + } + } + + void SignalStarted() + { + std::lock_guard lock(mStartupMutex); + mStarted = true; + mStartupCondition.notify_all(); + } + + void SignalStartupFailure(const std::string& error) + { + std::lock_guard lock(mStartupMutex); + mStartupError = error; + mStartupCondition.notify_all(); + } + + LatestFrameStore& mFrameStore; + unsigned mWidth = 0; + unsigned mHeight = 0; + Clock::duration mFrameDuration; + std::thread mThread; + std::atomic mStopping{ false }; + std::mutex mStartupMutex; + std::condition_variable mStartupCondition; + bool mStarted = false; + std::string mStartupError; + GLuint mFramebuffer = 0; + GLuint mTexture = 0; + std::vector mPbos; + std::size_t mWriteIndex = 0; + std::size_t mReadIndex = 0; + uint64_t mFrameIndex = 0; +}; + +class DeckLinkProbePlayout +{ +public: + DeckLinkProbePlayout(DeckLinkSession& session, LatestFrameStore& frameStore) : + mSession(session), + mFrameStore(frameStore) + { + } + + bool Start() + { + mStopping = false; + mThread = std::thread([this]() { ThreadMain(); }); + return true; + } + + void Stop() + { + mStopping = true; + if (mThread.joinable()) + mThread.join(); + } + + void ThreadMain() + { + while (!mStopping) + { + const ProbeMetrics metrics = mFrameStore.Metrics(); + if (metrics.scheduledCount >= kDeckLinkTargetBufferedFrames) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } + + ProbeFrame frame; + if (!mFrameStore.ConsumeCompleted(frame)) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } + + VideoIOOutputFrame outputFrame; + outputFrame.bytes = frame.bytes; + outputFrame.nativeBuffer = frame.bytes; + outputFrame.rowBytes = frame.rowBytes; + outputFrame.width = frame.width; + outputFrame.height = frame.height; + outputFrame.pixelFormat = frame.pixelFormat; + + if (!mSession.ScheduleOutputFrame(outputFrame)) + { + mFrameStore.ReleaseByBytes(frame.bytes); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + } + +private: + DeckLinkSession& mSession; + LatestFrameStore& mFrameStore; + std::thread mThread; + std::atomic mStopping{ false }; +}; + +std::string CompletionResultToString(VideoIOCompletionResult result) +{ + switch (result) + { + case VideoIOCompletionResult::Completed: + return "completed"; + case VideoIOCompletionResult::DisplayedLate: + return "late"; + case VideoIOCompletionResult::Dropped: + return "dropped"; + case VideoIOCompletionResult::Flushed: + return "flushed"; + case VideoIOCompletionResult::Unknown: + default: + return "unknown"; + } +} + +void PrintUsage() +{ + std::cout << "DeckLinkRenderCadenceProbe\n" + << " Renders a simple OpenGL BGRA8 motion pattern on one GL thread,\n" + << " copies completed PBO readbacks into latest-N system memory slots,\n" + << " warms up rendered frames, then feeds DeckLink scheduled playback.\n\n" + << "Press Enter to stop.\n"; +} + +class ComInitGuard +{ +public: + ~ComInitGuard() + { + if (mInitialized) + CoUninitialize(); + } + + bool Initialize() + { + const HRESULT result = CoInitialize(nullptr); + mInitialized = SUCCEEDED(result); + mResult = result; + return mInitialized; + } + + HRESULT Result() const { return mResult; } + +private: + bool mInitialized = false; + HRESULT mResult = S_OK; +}; +} + +int main() +{ + PrintUsage(); + + ComInitGuard com; + if (!com.Initialize()) + { + std::cerr << "COM initialization failed: 0x" << std::hex << com.Result() << std::dec << "\n"; + return 1; + } + + LatestFrameStore frameStore(kDefaultWidth, kDefaultHeight, kSystemFrameSlots); + DeckLinkSession deckLink; + std::atomic completions{ 0 }; + std::atomic late{ 0 }; + std::atomic dropped{ 0 }; + + VideoFormatSelection formats; + std::string error; + if (!deckLink.DiscoverDevicesAndModes(formats, error)) + { + std::cerr << "DeckLink discovery failed: " << error << "\n"; + return 1; + } + if (!deckLink.SelectPreferredFormats(formats, false, error)) + { + std::cerr << "DeckLink format selection failed: " << error << "\n"; + return 1; + } + if (!deckLink.ConfigureOutput( + [&](const VideoIOCompletion& completion) { + frameStore.ReleaseByBytes(completion.outputFrameBuffer); + ++completions; + if (completion.result == VideoIOCompletionResult::DisplayedLate) + ++late; + else if (completion.result == VideoIOCompletionResult::Dropped) + ++dropped; + }, + formats.output, + false, + error)) + { + std::cerr << "DeckLink output configuration failed: " << error << "\n"; + return 1; + } + if (!deckLink.PrepareOutputSchedule()) + { + std::cerr << "DeckLink schedule preparation failed.\n"; + return 1; + } + + const VideoIOState& state = deckLink.State(); + if (state.outputFrameSize.width != kDefaultWidth || state.outputFrameSize.height != kDefaultHeight) + { + std::cerr << "This probe currently expects 1920x1080 output. Selected mode is " + << state.outputFrameSize.width << "x" << state.outputFrameSize.height << ".\n"; + return 1; + } + + RenderCadenceProbe renderer(frameStore, state.outputFrameSize.width, state.outputFrameSize.height, state.frameBudgetMilliseconds); + if (!renderer.Start(error)) + { + std::cerr << "Render thread start failed: " << error << "\n"; + return 1; + } + + std::cout << "Warming up " << kWarmupFrames << " rendered frames at cadence...\n"; + if (!frameStore.WaitForCompletedDepth(kWarmupFrames, std::chrono::seconds(3))) + { + std::cerr << "Timed out waiting for rendered warmup frames.\n"; + renderer.Stop(); + return 1; + } + + DeckLinkProbePlayout playout(deckLink, frameStore); + playout.Start(); + + const auto prerollDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3); + while (std::chrono::steady_clock::now() < prerollDeadline) + { + if (frameStore.Metrics().scheduledCount >= kDeckLinkTargetBufferedFrames) + break; + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + } + + if (!deckLink.StartScheduledPlayback()) + { + std::cerr << "DeckLink scheduled playback failed to start.\n"; + playout.Stop(); + renderer.Stop(); + return 1; + } + + std::atomic metricsStopping{ false }; + std::thread metricsThread([&]() { + uint64_t lastRendered = 0; + uint64_t lastScheduled = 0; + auto lastTime = std::chrono::steady_clock::now(); + while (!metricsStopping) + { + std::this_thread::sleep_for(std::chrono::seconds(1)); + const auto now = std::chrono::steady_clock::now(); + const double seconds = std::chrono::duration_cast>(now - lastTime).count(); + const ProbeMetrics metrics = frameStore.Metrics(); + const double renderFps = seconds > 0.0 ? static_cast(metrics.renderedFrames - lastRendered) / seconds : 0.0; + const double scheduleFps = seconds > 0.0 ? static_cast(metrics.scheduledFrames - lastScheduled) / seconds : 0.0; + lastRendered = metrics.renderedFrames; + lastScheduled = metrics.scheduledFrames; + lastTime = now; + + std::cout << std::fixed << std::setprecision(1) + << "renderFps=" << renderFps + << " scheduleFps=" << scheduleFps + << " free=" << metrics.freeCount + << " completed=" << metrics.completedCount + << " scheduled=" << metrics.scheduledCount + << " drops=" << metrics.completedDrops + << " pboMiss=" << metrics.pboQueueMisses + << " completions=" << completions.load() + << " late=" << late.load() + << " dropped=" << dropped.load() + << " decklinkBuffered=" << deckLink.State().actualDeckLinkBufferedFrames + << "\n"; + } + }); + + std::string line; + std::getline(std::cin, line); + + metricsStopping = true; + if (metricsThread.joinable()) + metricsThread.join(); + playout.Stop(); + deckLink.Stop(); + renderer.Stop(); + deckLink.ReleaseResources(); + return 0; +} diff --git a/apps/DeckLinkRenderCadenceProbe/README.md b/apps/DeckLinkRenderCadenceProbe/README.md new file mode 100644 index 0000000..6823074 --- /dev/null +++ b/apps/DeckLinkRenderCadenceProbe/README.md @@ -0,0 +1,113 @@ +# DeckLink Render Cadence Probe + +This is a deliberately small architecture probe for the Phase 7.7 playout model. + +It is not the main app and does not use the main runtime, shader stack, preview path, input upload path, or render engine. + +## What It Tests + +The probe validates the clean playout spine: + +```text +single OpenGL render thread + owns its own hidden GL context + renders a simple moving BGRA8 pattern at output cadence + queues GPU readback through a PBO ring + copies completed readbacks into latest-N system-memory slots + +system-memory frame store + owns free / rendering / completed / scheduled slots + drops old completed unscheduled frames when render cadence needs space + protects scheduled frames until DeckLink completion + +DeckLink playout thread + consumes completed system-memory frames + keeps a small scheduled buffer filled + does not render +``` + +Startup warms up rendered frames before starting DeckLink scheduled playback. + +## How To Build + +```powershell +cmake --build --preset build-debug --target DeckLinkRenderCadenceProbe -- /m:1 +``` + +The executable is: + +```text +build\vs2022-x64-debug\Debug\DeckLinkRenderCadenceProbe.exe +``` + +## How To Run + +Run it from a terminal so you can see the telemetry: + +```powershell +build\vs2022-x64-debug\Debug\DeckLinkRenderCadenceProbe.exe +``` + +Press Enter to stop. + +The first version assumes `1080p59.94` / `1920x1080` output and BGRA8 system-memory frames. + +## What To Watch + +The probe prints one line per second: + +- `renderFps`: cadence render throughput +- `scheduleFps`: DeckLink scheduling throughput +- `free`: free system-memory slots +- `completed`: rendered, unscheduled slots +- `scheduled`: slots currently owned by DeckLink +- `drops`: old completed unscheduled frames recycled by the latest-N cache +- `pboMiss`: PBO ring was full when trying to queue readback +- `late`: DeckLink displayed-late completions +- `dropped`: DeckLink dropped completions +- `decklinkBuffered`: actual DeckLink buffered-frame count when available + +For a healthy architecture proof, expect: + +- `renderFps` close to the selected output cadence +- `scheduleFps` close to the selected output cadence after warmup +- `scheduled` hovering near the target buffer depth +- `late` and `dropped` not increasing continuously +- visible motion that is smooth on the DeckLink output + +## Interpretation + +If this probe is smooth at 59.94/60, the broad architecture is viable and the main app's remaining stutters are likely caused by integration details such as input upload, shared render-thread work, preview/screenshot work, or runtime/render-state coupling. + +If this probe is not smooth, the problem is lower level: DeckLink scheduling, OpenGL readback, Windows scheduling, or hardware/driver behavior. + +## Initial Result + +Date: 2026-05-12 + +User-visible result: + +- output looked smooth + +Representative telemetry: + +```text +renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=119 late=0 dropped=0 decklinkBuffered=4 +renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=179 late=0 dropped=0 decklinkBuffered=4 +renderFps=59.8 scheduleFps=59.8 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=239 late=0 dropped=0 decklinkBuffered=4 +renderFps=60.8 scheduleFps=59.8 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=299 late=0 dropped=0 decklinkBuffered=4 +renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=360 late=0 dropped=0 decklinkBuffered=4 +renderFps=59.8 scheduleFps=60.8 free=8 completed=0 scheduled=4 drops=0 pboMiss=0 completions=420 late=0 dropped=0 decklinkBuffered=4 +``` + +Read: + +- the clean architecture can sustain the selected output cadence on the test machine +- BGRA8 PBO readback is viable when isolated from the main app's other render-thread work +- latest-N system-memory buffering stayed stable +- DeckLink actual buffered depth stayed at 4 +- there were no late frames, dropped frames, completed-frame drops, or PBO misses in the sampled output + +Implication: + +The main app's remaining stutters are likely integration/ownership issues rather than a fundamental DeckLink/OpenGL/BGRA8 readback limit. The highest-value suspects are input upload before output render, shared render-thread queue contention, preview/screenshot work, and runtime/render-state work on the output path.