Doc cleanup

Test works
Clean
2026-05-12 01:37:20 +10:00 · 2026-05-12 01:30:30 +10:00 · 2026-05-12 01:21:42 +10:00
26 changed files with 2221 additions and 5486 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -9,7 +9,12 @@
      "args": [],
      "stopAtEntry": false,
      "cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
-      "environment": [],
+      "environment": [
+        {
+          "name": "VST_DISABLE_INPUT_CAPTURE",
+          "value": "1"
+        }
+      ],
      "console": "internalConsole",
      "symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
      "requireExactSource": true,
@@ -61,6 +66,23 @@
        "moduleLoad": true
      },
      "preLaunchTask": "Build LoopThroughWithOpenGLCompositing Debug x64"
+    },
+    {
+      "name": "Debug DeckLinkRenderCadenceProbe",
+      "type": "cppvsdbg",
+      "request": "launch",
+      "program": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug\\DeckLinkRenderCadenceProbe.exe",
+      "args": [],
+      "stopAtEntry": false,
+      "cwd": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
+      "environment": [],
+      "console": "externalTerminal",
+      "symbolSearchPath": "${workspaceFolder}\\build\\vs2022-x64-debug\\Debug",
+      "requireExactSource": true,
+      "logging": {
+        "moduleLoad": true
+      },
+      "preLaunchTask": "Build DeckLinkRenderCadenceProbe Debug x64"
    }
  ]
 }
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -36,6 +36,22 @@
      "group": "build",
      "problemMatcher": "$msCompile"
    },
+    {
+      "label": "Build DeckLinkRenderCadenceProbe Debug x64",
+      "type": "process",
+      "command": "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\CMake\\bin\\cmake.exe",
+      "args": [
+        "--build",
+        "${workspaceFolder}\\build\\vs2022-x64-debug",
+        "--config",
+        "Debug",
+        "--target",
+        "DeckLinkRenderCadenceProbe",
+        "--parallel"
+      ],
+      "group": "build",
+      "problemMatcher": "$msCompile"
+    },
    {
      "label": "Clean LoopThroughWithOpenGLCompositing Debug x64",
      "type": "process",
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -229,6 +229,50 @@ if(MSVC)
 	target_compile_options(LoopThroughWithOpenGLCompositing PRIVATE /W3)
 endif()

+set(PROBE_APP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/apps/DeckLinkRenderCadenceProbe")
+
+add_executable(DeckLinkRenderCadenceProbe
+	"${APP_DIR}/videoio/decklink/DeckLinkAPI_i.c"
+	"${APP_DIR}/videoio/decklink/DeckLinkDisplayMode.cpp"
+	"${APP_DIR}/videoio/decklink/DeckLinkDisplayMode.h"
+	"${APP_DIR}/videoio/decklink/DeckLinkFrameTransfer.cpp"
+	"${APP_DIR}/videoio/decklink/DeckLinkFrameTransfer.h"
+	"${APP_DIR}/videoio/decklink/DeckLinkSession.cpp"
+	"${APP_DIR}/videoio/decklink/DeckLinkSession.h"
+	"${APP_DIR}/videoio/decklink/DeckLinkVideoIOFormat.cpp"
+	"${APP_DIR}/videoio/decklink/DeckLinkVideoIOFormat.h"
+	"${APP_DIR}/gl/renderer/GLExtensions.cpp"
+	"${APP_DIR}/gl/renderer/GLExtensions.h"
+	"${APP_DIR}/videoio/VideoIOFormat.cpp"
+	"${APP_DIR}/videoio/VideoIOFormat.h"
+	"${APP_DIR}/videoio/VideoIOTypes.h"
+	"${APP_DIR}/videoio/VideoPlayoutPolicy.h"
+	"${APP_DIR}/videoio/VideoPlayoutScheduler.cpp"
+	"${APP_DIR}/videoio/VideoPlayoutScheduler.h"
+	"${PROBE_APP_DIR}/DeckLinkRenderCadenceProbe.cpp"
+)
+
+target_include_directories(DeckLinkRenderCadenceProbe PRIVATE
+	"${APP_DIR}"
+	"${APP_DIR}/gl/renderer"
+	"${APP_DIR}/videoio"
+	"${APP_DIR}/videoio/decklink"
+)
+
+target_link_libraries(DeckLinkRenderCadenceProbe PRIVATE
+	opengl32
+	Ole32
+)
+
+target_compile_definitions(DeckLinkRenderCadenceProbe PRIVATE
+	_UNICODE
+	UNICODE
+)
+
+if(MSVC)
+	target_compile_options(DeckLinkRenderCadenceProbe PRIVATE /W3)
+endif()
+
 add_executable(RuntimeJsonTests
 	"${APP_DIR}/runtime/support/RuntimeJson.cpp"
 	"${CMAKE_CURRENT_SOURCE_DIR}/tests/RuntimeJsonTests.cpp"
--- a/apps/DeckLinkRenderCadenceProbe/DeckLinkRenderCadenceProbe.cpp
+++ b/apps/DeckLinkRenderCadenceProbe/DeckLinkRenderCadenceProbe.cpp
@@ -0,0 +1,920 @@
+#include "DeckLinkSession.h"
+#include "GLExtensions.h"
+#include "VideoIOFormat.h"
+#include "VideoPlayoutPolicy.h"
+
+#include <windows.h>
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cmath>
+#include <condition_variable>
+#include <cstdint>
+#include <deque>
+#include <iomanip>
+#include <iostream>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <vector>
+
+namespace
+{
+constexpr unsigned kDefaultWidth = 1920;
+constexpr unsigned kDefaultHeight = 1080;
+constexpr std::size_t kSystemFrameSlots = 12;
+constexpr std::size_t kPboDepth = 6;
+constexpr std::size_t kWarmupFrames = 4;
+constexpr std::size_t kDeckLinkTargetBufferedFrames = 4;
+
+enum class ProbeSlotState
+{
+	Free,
+	Rendering,
+	Completed,
+	Scheduled
+};
+
+struct ProbeFrame
+{
+	void* bytes = nullptr;
+	long rowBytes = 0;
+	unsigned width = 0;
+	unsigned height = 0;
+	VideoIOPixelFormat pixelFormat = VideoIOPixelFormat::Bgra8;
+	std::size_t index = 0;
+	uint64_t generation = 0;
+	uint64_t frameIndex = 0;
+};
+
+struct ProbeMetrics
+{
+	uint64_t renderedFrames = 0;
+	uint64_t completedFrames = 0;
+	uint64_t scheduledFrames = 0;
+	uint64_t completedDrops = 0;
+	uint64_t acquireMisses = 0;
+	uint64_t scheduleUnderruns = 0;
+	uint64_t pboQueueMisses = 0;
+	std::size_t freeCount = 0;
+	std::size_t renderingCount = 0;
+	std::size_t completedCount = 0;
+	std::size_t scheduledCount = 0;
+};
+
+class LatestFrameStore
+{
+public:
+	LatestFrameStore(unsigned width, unsigned height, std::size_t capacity) :
+		mWidth(width),
+		mHeight(height),
+		mRowBytes(VideoIORowBytes(VideoIOPixelFormat::Bgra8, width))
+	{
+		mSlots.resize(capacity);
+		const std::size_t byteCount = static_cast<std::size_t>(mRowBytes) * static_cast<std::size_t>(mHeight);
+		for (Slot& slot : mSlots)
+		{
+			slot.bytes.resize(byteCount);
+			slot.generation = 1;
+		}
+	}
+
+	bool AcquireForRender(ProbeFrame& frame)
+	{
+		std::lock_guard<std::mutex> lock(mMutex);
+		if (!AcquireFreeLocked(frame))
+		{
+			if (!DropOldestCompletedLocked() || !AcquireFreeLocked(frame))
+			{
+				++mMetrics.acquireMisses;
+				return false;
+			}
+		}
+		return true;
+	}
+
+	bool PublishCompleted(const ProbeFrame& frame)
+	{
+		std::lock_guard<std::mutex> lock(mMutex);
+		if (!IsValidLocked(frame))
+			return false;
+		Slot& slot = mSlots[frame.index];
+		if (slot.state != ProbeSlotState::Rendering)
+			return false;
+		slot.state = ProbeSlotState::Completed;
+		slot.frameIndex = frame.frameIndex;
+		mCompletedIndices.push_back(frame.index);
+		++mMetrics.completedFrames;
+		mCondition.notify_all();
+		return true;
+	}
+
+	bool ConsumeCompleted(ProbeFrame& frame)
+	{
+		std::lock_guard<std::mutex> lock(mMutex);
+		while (!mCompletedIndices.empty())
+		{
+			const std::size_t index = mCompletedIndices.front();
+			mCompletedIndices.pop_front();
+			if (index >= mSlots.size() || mSlots[index].state != ProbeSlotState::Completed)
+				continue;
+			mSlots[index].state = ProbeSlotState::Scheduled;
+			FillFrameLocked(index, frame);
+			++mMetrics.scheduledFrames;
+			return true;
+		}
+		++mMetrics.scheduleUnderruns;
+		return false;
+	}
+
+	bool ReleaseByBytes(void* bytes)
+	{
+		if (bytes == nullptr)
+			return false;
+		std::lock_guard<std::mutex> lock(mMutex);
+		for (std::size_t index = 0; index < mSlots.size(); ++index)
+		{
+			if (mSlots[index].bytes.data() != bytes)
+				continue;
+			mSlots[index].state = ProbeSlotState::Free;
+			++mSlots[index].generation;
+			RemoveCompletedIndexLocked(index);
+			mCondition.notify_all();
+			return true;
+		}
+		return false;
+	}
+
+	bool WaitForCompletedDepth(std::size_t targetDepth, std::chrono::milliseconds timeout)
+	{
+		std::unique_lock<std::mutex> lock(mMutex);
+		return mCondition.wait_for(lock, timeout, [&]() {
+			return CompletedCountLocked() >= targetDepth;
+		});
+	}
+
+	ProbeMetrics Metrics() const
+	{
+		std::lock_guard<std::mutex> lock(mMutex);
+		ProbeMetrics metrics = mMetrics;
+		for (const Slot& slot : mSlots)
+		{
+			switch (slot.state)
+			{
+			case ProbeSlotState::Free:
+				++metrics.freeCount;
+				break;
+			case ProbeSlotState::Rendering:
+				++metrics.renderingCount;
+				break;
+			case ProbeSlotState::Completed:
+				++metrics.completedCount;
+				break;
+			case ProbeSlotState::Scheduled:
+				++metrics.scheduledCount;
+				break;
+			}
+		}
+		return metrics;
+	}
+
+	void CountRenderedFrame()
+	{
+		std::lock_guard<std::mutex> lock(mMutex);
+		++mMetrics.renderedFrames;
+	}
+
+	void CountPboQueueMiss()
+	{
+		std::lock_guard<std::mutex> lock(mMutex);
+		++mMetrics.pboQueueMisses;
+	}
+
+private:
+	struct Slot
+	{
+		std::vector<unsigned char> bytes;
+		ProbeSlotState state = ProbeSlotState::Free;
+		uint64_t generation = 1;
+		uint64_t frameIndex = 0;
+	};
+
+	bool AcquireFreeLocked(ProbeFrame& frame)
+	{
+		for (std::size_t index = 0; index < mSlots.size(); ++index)
+		{
+			if (mSlots[index].state != ProbeSlotState::Free)
+				continue;
+			mSlots[index].state = ProbeSlotState::Rendering;
+			++mSlots[index].generation;
+			FillFrameLocked(index, frame);
+			return true;
+		}
+		return false;
+	}
+
+	bool DropOldestCompletedLocked()
+	{
+		while (!mCompletedIndices.empty())
+		{
+			const std::size_t index = mCompletedIndices.front();
+			mCompletedIndices.pop_front();
+			if (index >= mSlots.size() || mSlots[index].state != ProbeSlotState::Completed)
+				continue;
+			mSlots[index].state = ProbeSlotState::Free;
+			++mSlots[index].generation;
+			++mMetrics.completedDrops;
+			return true;
+		}
+		return false;
+	}
+
+	void FillFrameLocked(std::size_t index, ProbeFrame& frame) const
+	{
+		const Slot& slot = mSlots[index];
+		frame.bytes = const_cast<unsigned char*>(slot.bytes.data());
+		frame.rowBytes = static_cast<long>(mRowBytes);
+		frame.width = mWidth;
+		frame.height = mHeight;
+		frame.pixelFormat = VideoIOPixelFormat::Bgra8;
+		frame.index = index;
+		frame.generation = slot.generation;
+		frame.frameIndex = slot.frameIndex;
+	}
+
+	bool IsValidLocked(const ProbeFrame& frame) const
+	{
+		return frame.index < mSlots.size() && mSlots[frame.index].generation == frame.generation;
+	}
+
+	void RemoveCompletedIndexLocked(std::size_t index)
+	{
+		mCompletedIndices.erase(std::remove(mCompletedIndices.begin(), mCompletedIndices.end(), index), mCompletedIndices.end());
+	}
+
+	std::size_t CompletedCountLocked() const
+	{
+		std::size_t count = 0;
+		for (const Slot& slot : mSlots)
+		{
+			if (slot.state == ProbeSlotState::Completed)
+				++count;
+		}
+		return count;
+	}
+
+	unsigned mWidth = 0;
+	unsigned mHeight = 0;
+	unsigned mRowBytes = 0;
+	std::vector<Slot> mSlots;
+	std::deque<std::size_t> mCompletedIndices;
+	mutable std::mutex mMutex;
+	std::condition_variable mCondition;
+	ProbeMetrics mMetrics;
+};
+
+LRESULT CALLBACK ProbeWindowProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam)
+{
+	return DefWindowProc(hwnd, message, wParam, lParam);
+}
+
+class HiddenOpenGLContext
+{
+public:
+	~HiddenOpenGLContext()
+	{
+		Destroy();
+	}
+
+	bool Create(unsigned width, unsigned height, std::string& error)
+	{
+		mInstance = GetModuleHandle(nullptr);
+		WNDCLASSA wc = {};
+		wc.style = CS_OWNDC;
+		wc.lpfnWndProc = ProbeWindowProc;
+		wc.hInstance = mInstance;
+		wc.lpszClassName = "DeckLinkRenderCadenceProbeWindow";
+		RegisterClassA(&wc);
+
+		mWindow = CreateWindowA(
+			wc.lpszClassName,
+			"DeckLink Render Cadence Probe",
+			WS_OVERLAPPEDWINDOW,
+			CW_USEDEFAULT,
+			CW_USEDEFAULT,
+			static_cast<int>(width),
+			static_cast<int>(height),
+			nullptr,
+			nullptr,
+			mInstance,
+			nullptr);
+		if (!mWindow)
+		{
+			error = "CreateWindowA failed.";
+			return false;
+		}
+
+		mDc = GetDC(mWindow);
+		if (!mDc)
+		{
+			error = "GetDC failed.";
+			return false;
+		}
+
+		PIXELFORMATDESCRIPTOR pfd = {};
+		pfd.nSize = sizeof(pfd);
+		pfd.nVersion = 1;
+		pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
+		pfd.iPixelType = PFD_TYPE_RGBA;
+		pfd.cColorBits = 32;
+		pfd.cDepthBits = 0;
+		pfd.iLayerType = PFD_MAIN_PLANE;
+
+		const int pixelFormat = ChoosePixelFormat(mDc, &pfd);
+		if (pixelFormat == 0 || !SetPixelFormat(mDc, pixelFormat, &pfd))
+		{
+			error = "Could not choose/set a pixel format.";
+			return false;
+		}
+
+		mGlrc = wglCreateContext(mDc);
+		if (!mGlrc)
+		{
+			error = "wglCreateContext failed.";
+			return false;
+		}
+		return true;
+	}
+
+	bool MakeCurrent()
+	{
+		return mDc && mGlrc && wglMakeCurrent(mDc, mGlrc);
+	}
+
+	void ClearCurrent()
+	{
+		wglMakeCurrent(nullptr, nullptr);
+	}
+
+	void Destroy()
+	{
+		ClearCurrent();
+		if (mGlrc)
+		{
+			wglDeleteContext(mGlrc);
+			mGlrc = nullptr;
+		}
+		if (mWindow && mDc)
+		{
+			ReleaseDC(mWindow, mDc);
+			mDc = nullptr;
+		}
+		if (mWindow)
+		{
+			DestroyWindow(mWindow);
+			mWindow = nullptr;
+		}
+	}
+
+private:
+	HINSTANCE mInstance = nullptr;
+	HWND mWindow = nullptr;
+	HDC mDc = nullptr;
+	HGLRC mGlrc = nullptr;
+};
+
+class RenderCadenceProbe
+{
+public:
+	RenderCadenceProbe(LatestFrameStore& frameStore, unsigned width, unsigned height, double frameDurationMs) :
+		mFrameStore(frameStore),
+		mWidth(width),
+		mHeight(height),
+		mFrameDuration(std::chrono::duration_cast<Clock::duration>(std::chrono::duration<double, std::milli>(frameDurationMs)))
+	{
+		if (mFrameDuration <= Clock::duration::zero())
+			mFrameDuration = std::chrono::milliseconds(16);
+	}
+
+	bool Start(std::string& error)
+	{
+		mStopping = false;
+		mThread = std::thread([this]() { ThreadMain(); });
+		std::unique_lock<std::mutex> lock(mStartupMutex);
+		if (!mStartupCondition.wait_for(lock, std::chrono::seconds(3), [this]() { return mStarted || !mStartupError.empty(); }))
+		{
+			error = "Timed out starting render thread.";
+			return false;
+		}
+		if (!mStartupError.empty())
+		{
+			error = mStartupError;
+			return false;
+		}
+		return true;
+	}
+
+	void Stop()
+	{
+		mStopping = true;
+		if (mThread.joinable())
+			mThread.join();
+	}
+
+private:
+	struct PboSlot
+	{
+		GLuint pbo = 0;
+		GLsync fence = nullptr;
+		bool inFlight = false;
+		uint64_t frameIndex = 0;
+	};
+
+	using Clock = std::chrono::steady_clock;
+
+	void ThreadMain()
+	{
+		std::string error;
+		HiddenOpenGLContext context;
+		if (!context.Create(mWidth, mHeight, error) || !context.MakeCurrent())
+		{
+			SignalStartupFailure(error.empty() ? "OpenGL context creation failed." : error);
+			return;
+		}
+		if (!ResolveGLExtensions())
+		{
+			SignalStartupFailure("OpenGL extension resolution failed.");
+			return;
+		}
+		if (!CreateRenderTargets())
+		{
+			SignalStartupFailure("OpenGL render target creation failed.");
+			return;
+		}
+		CreatePbos();
+		SignalStarted();
+
+		auto nextRenderTime = Clock::now();
+		while (!mStopping)
+		{
+			ConsumeCompletedPbos();
+
+			const auto now = Clock::now();
+			if (now < nextRenderTime)
+			{
+				std::this_thread::sleep_for((std::min)(std::chrono::milliseconds(1), std::chrono::duration_cast<std::chrono::milliseconds>(nextRenderTime - now)));
+				continue;
+			}
+
+			RenderPattern(mFrameIndex);
+			if (!QueueReadback(mFrameIndex))
+				mFrameStore.CountPboQueueMiss();
+			mFrameStore.CountRenderedFrame();
+			++mFrameIndex;
+			nextRenderTime += mFrameDuration;
+			if (Clock::now() - nextRenderTime > mFrameDuration * 4)
+				nextRenderTime = Clock::now() + mFrameDuration;
+		}
+
+		FlushPbos();
+		DestroyPbos();
+		DestroyRenderTargets();
+		context.ClearCurrent();
+	}
+
+	bool CreateRenderTargets()
+	{
+		glGenFramebuffers(1, &mFramebuffer);
+		glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer);
+		glGenTextures(1, &mTexture);
+		glBindTexture(GL_TEXTURE_2D, mTexture);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, static_cast<GLsizei>(mWidth), static_cast<GLsizei>(mHeight), 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
+		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mTexture, 0);
+		const bool complete = glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE;
+		glBindTexture(GL_TEXTURE_2D, 0);
+		glBindFramebuffer(GL_FRAMEBUFFER, 0);
+		return complete;
+	}
+
+	void DestroyRenderTargets()
+	{
+		if (mFramebuffer != 0)
+			glDeleteFramebuffers(1, &mFramebuffer);
+		if (mTexture != 0)
+			glDeleteTextures(1, &mTexture);
+		mFramebuffer = 0;
+		mTexture = 0;
+	}
+
+	void CreatePbos()
+	{
+		mPbos.resize(kPboDepth);
+		const std::size_t byteCount = static_cast<std::size_t>(VideoIORowBytes(VideoIOPixelFormat::Bgra8, mWidth)) * mHeight;
+		for (PboSlot& slot : mPbos)
+		{
+			glGenBuffers(1, &slot.pbo);
+			glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pbo);
+			glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(byteCount), nullptr, GL_STREAM_READ);
+		}
+		glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+	}
+
+	void DestroyPbos()
+	{
+		for (PboSlot& slot : mPbos)
+		{
+			if (slot.fence)
+				glDeleteSync(slot.fence);
+			if (slot.pbo != 0)
+				glDeleteBuffers(1, &slot.pbo);
+			slot = {};
+		}
+		mPbos.clear();
+	}
+
+	void FlushPbos()
+	{
+		for (std::size_t i = 0; i < mPbos.size() * 2; ++i)
+			ConsumeCompletedPbos();
+	}
+
+	void RenderPattern(uint64_t frameIndex)
+	{
+		const float t = static_cast<float>(frameIndex) / 60.0f;
+		const float red = 0.1f + 0.4f * (0.5f + 0.5f * std::sin(t));
+		const float green = 0.1f + 0.4f * (0.5f + 0.5f * std::sin(t * 0.73f + 1.0f));
+		const float blue = 0.15f + 0.3f * (0.5f + 0.5f * std::sin(t * 0.41f + 2.0f));
+
+		glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer);
+		glViewport(0, 0, static_cast<GLsizei>(mWidth), static_cast<GLsizei>(mHeight));
+		glDisable(GL_SCISSOR_TEST);
+		glClearColor(red, green, blue, 1.0f);
+		glClear(GL_COLOR_BUFFER_BIT);
+
+		const int boxWidth = static_cast<int>(mWidth / 6);
+		const int boxHeight = static_cast<int>(mHeight / 5);
+		const float phase = 0.5f + 0.5f * std::sin(t * 1.7f);
+		const int x = static_cast<int>(phase * static_cast<float>(mWidth - boxWidth));
+		const int y = static_cast<int>((0.5f + 0.5f * std::sin(t * 1.1f + 0.8f)) * static_cast<float>(mHeight - boxHeight));
+
+		glEnable(GL_SCISSOR_TEST);
+		glScissor(x, y, boxWidth, boxHeight);
+		glClearColor(1.0f - red, 0.85f, 0.15f + blue, 1.0f);
+		glClear(GL_COLOR_BUFFER_BIT);
+		glDisable(GL_SCISSOR_TEST);
+	}
+
+	bool QueueReadback(uint64_t frameIndex)
+	{
+		if (mPbos.empty())
+			return false;
+
+		PboSlot& slot = mPbos[mWriteIndex];
+		if (slot.inFlight)
+			return false;
+
+		const std::size_t byteCount = static_cast<std::size_t>(VideoIORowBytes(VideoIOPixelFormat::Bgra8, mWidth)) * mHeight;
+		glBindFramebuffer(GL_READ_FRAMEBUFFER, mFramebuffer);
+		glPixelStorei(GL_PACK_ALIGNMENT, 4);
+		glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+		glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pbo);
+		glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(byteCount), nullptr, GL_STREAM_READ);
+		glReadPixels(0, 0, static_cast<GLsizei>(mWidth), static_cast<GLsizei>(mHeight), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
+		slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+		slot.inFlight = slot.fence != nullptr;
+		slot.frameIndex = frameIndex;
+		glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+		mWriteIndex = (mWriteIndex + 1) % mPbos.size();
+		return slot.inFlight;
+	}
+
+	void ConsumeCompletedPbos()
+	{
+		for (std::size_t checked = 0; checked < mPbos.size(); ++checked)
+		{
+			PboSlot& slot = mPbos[mReadIndex];
+			if (!slot.inFlight || slot.fence == nullptr)
+			{
+				mReadIndex = (mReadIndex + 1) % mPbos.size();
+				continue;
+			}
+
+			const GLenum waitResult = glClientWaitSync(slot.fence, 0, 0);
+			if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
+				return;
+
+			ProbeFrame frame;
+			if (mFrameStore.AcquireForRender(frame))
+			{
+				glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pbo);
+				void* mapped = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
+				if (mapped)
+				{
+					const std::size_t byteCount = static_cast<std::size_t>(frame.rowBytes) * frame.height;
+					std::memcpy(frame.bytes, mapped, byteCount);
+					glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
+					frame.frameIndex = slot.frameIndex;
+					mFrameStore.PublishCompleted(frame);
+				}
+				glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+			}
+
+			glDeleteSync(slot.fence);
+			slot.fence = nullptr;
+			slot.inFlight = false;
+			mReadIndex = (mReadIndex + 1) % mPbos.size();
+		}
+	}
+
+	void SignalStarted()
+	{
+		std::lock_guard<std::mutex> lock(mStartupMutex);
+		mStarted = true;
+		mStartupCondition.notify_all();
+	}
+
+	void SignalStartupFailure(const std::string& error)
+	{
+		std::lock_guard<std::mutex> lock(mStartupMutex);
+		mStartupError = error;
+		mStartupCondition.notify_all();
+	}
+
+	LatestFrameStore& mFrameStore;
+	unsigned mWidth = 0;
+	unsigned mHeight = 0;
+	Clock::duration mFrameDuration;
+	std::thread mThread;
+	std::atomic<bool> mStopping{ false };
+	std::mutex mStartupMutex;
+	std::condition_variable mStartupCondition;
+	bool mStarted = false;
+	std::string mStartupError;
+	GLuint mFramebuffer = 0;
+	GLuint mTexture = 0;
+	std::vector<PboSlot> mPbos;
+	std::size_t mWriteIndex = 0;
+	std::size_t mReadIndex = 0;
+	uint64_t mFrameIndex = 0;
+};
+
+class DeckLinkProbePlayout
+{
+public:
+	DeckLinkProbePlayout(DeckLinkSession& session, LatestFrameStore& frameStore) :
+		mSession(session),
+		mFrameStore(frameStore)
+	{
+	}
+
+	bool Start()
+	{
+		mStopping = false;
+		mThread = std::thread([this]() { ThreadMain(); });
+		return true;
+	}
+
+	void Stop()
+	{
+		mStopping = true;
+		if (mThread.joinable())
+			mThread.join();
+	}
+
+	void ThreadMain()
+	{
+		while (!mStopping)
+		{
+			const ProbeMetrics metrics = mFrameStore.Metrics();
+			if (metrics.scheduledCount >= kDeckLinkTargetBufferedFrames)
+			{
+				std::this_thread::sleep_for(std::chrono::milliseconds(1));
+				continue;
+			}
+
+			ProbeFrame frame;
+			if (!mFrameStore.ConsumeCompleted(frame))
+			{
+				std::this_thread::sleep_for(std::chrono::milliseconds(1));
+				continue;
+			}
+
+			VideoIOOutputFrame outputFrame;
+			outputFrame.bytes = frame.bytes;
+			outputFrame.nativeBuffer = frame.bytes;
+			outputFrame.rowBytes = frame.rowBytes;
+			outputFrame.width = frame.width;
+			outputFrame.height = frame.height;
+			outputFrame.pixelFormat = frame.pixelFormat;
+
+			if (!mSession.ScheduleOutputFrame(outputFrame))
+			{
+				mFrameStore.ReleaseByBytes(frame.bytes);
+				std::this_thread::sleep_for(std::chrono::milliseconds(1));
+			}
+		}
+	}
+
+private:
+	DeckLinkSession& mSession;
+	LatestFrameStore& mFrameStore;
+	std::thread mThread;
+	std::atomic<bool> mStopping{ false };
+};
+
+std::string CompletionResultToString(VideoIOCompletionResult result)
+{
+	switch (result)
+	{
+	case VideoIOCompletionResult::Completed:
+		return "completed";
+	case VideoIOCompletionResult::DisplayedLate:
+		return "late";
+	case VideoIOCompletionResult::Dropped:
+		return "dropped";
+	case VideoIOCompletionResult::Flushed:
+		return "flushed";
+	case VideoIOCompletionResult::Unknown:
+	default:
+		return "unknown";
+	}
+}
+
+void PrintUsage()
+{
+	std::cout << "DeckLinkRenderCadenceProbe\n"
+		<< "  Renders a simple OpenGL BGRA8 motion pattern on one GL thread,\n"
+		<< "  copies completed PBO readbacks into latest-N system memory slots,\n"
+		<< "  warms up rendered frames, then feeds DeckLink scheduled playback.\n\n"
+		<< "Press Enter to stop.\n";
+}
+
+class ComInitGuard
+{
+public:
+	~ComInitGuard()
+	{
+		if (mInitialized)
+			CoUninitialize();
+	}
+
+	bool Initialize()
+	{
+		const HRESULT result = CoInitialize(nullptr);
+		mInitialized = SUCCEEDED(result);
+		mResult = result;
+		return mInitialized;
+	}
+
+	HRESULT Result() const { return mResult; }
+
+private:
+	bool mInitialized = false;
+	HRESULT mResult = S_OK;
+};
+}
+
+int main()
+{
+	PrintUsage();
+
+	ComInitGuard com;
+	if (!com.Initialize())
+	{
+		std::cerr << "COM initialization failed: 0x" << std::hex << com.Result() << std::dec << "\n";
+		return 1;
+	}
+
+	LatestFrameStore frameStore(kDefaultWidth, kDefaultHeight, kSystemFrameSlots);
+	DeckLinkSession deckLink;
+	std::atomic<uint64_t> completions{ 0 };
+	std::atomic<uint64_t> late{ 0 };
+	std::atomic<uint64_t> dropped{ 0 };
+
+	VideoFormatSelection formats;
+	std::string error;
+	if (!deckLink.DiscoverDevicesAndModes(formats, error))
+	{
+		std::cerr << "DeckLink discovery failed: " << error << "\n";
+		return 1;
+	}
+	if (!deckLink.SelectPreferredFormats(formats, false, error))
+	{
+		std::cerr << "DeckLink format selection failed: " << error << "\n";
+		return 1;
+	}
+	if (!deckLink.ConfigureOutput(
+		[&](const VideoIOCompletion& completion) {
+			frameStore.ReleaseByBytes(completion.outputFrameBuffer);
+			++completions;
+			if (completion.result == VideoIOCompletionResult::DisplayedLate)
+				++late;
+			else if (completion.result == VideoIOCompletionResult::Dropped)
+				++dropped;
+		},
+		formats.output,
+		false,
+		error))
+	{
+		std::cerr << "DeckLink output configuration failed: " << error << "\n";
+		return 1;
+	}
+	if (!deckLink.PrepareOutputSchedule())
+	{
+		std::cerr << "DeckLink schedule preparation failed.\n";
+		return 1;
+	}
+
+	const VideoIOState& state = deckLink.State();
+	if (state.outputFrameSize.width != kDefaultWidth || state.outputFrameSize.height != kDefaultHeight)
+	{
+		std::cerr << "This probe currently expects 1920x1080 output. Selected mode is "
+			<< state.outputFrameSize.width << "x" << state.outputFrameSize.height << ".\n";
+		return 1;
+	}
+
+	RenderCadenceProbe renderer(frameStore, state.outputFrameSize.width, state.outputFrameSize.height, state.frameBudgetMilliseconds);
+	if (!renderer.Start(error))
+	{
+		std::cerr << "Render thread start failed: " << error << "\n";
+		return 1;
+	}
+
+	std::cout << "Warming up " << kWarmupFrames << " rendered frames at cadence...\n";
+	if (!frameStore.WaitForCompletedDepth(kWarmupFrames, std::chrono::seconds(3)))
+	{
+		std::cerr << "Timed out waiting for rendered warmup frames.\n";
+		renderer.Stop();
+		return 1;
+	}
+
+	DeckLinkProbePlayout playout(deckLink, frameStore);
+	playout.Start();
+
+	const auto prerollDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3);
+	while (std::chrono::steady_clock::now() < prerollDeadline)
+	{
+		if (frameStore.Metrics().scheduledCount >= kDeckLinkTargetBufferedFrames)
+			break;
+		std::this_thread::sleep_for(std::chrono::milliseconds(2));
+	}
+
+	if (!deckLink.StartScheduledPlayback())
+	{
+		std::cerr << "DeckLink scheduled playback failed to start.\n";
+		playout.Stop();
+		renderer.Stop();
+		return 1;
+	}
+
+	std::atomic<bool> metricsStopping{ false };
+	std::thread metricsThread([&]() {
+		uint64_t lastRendered = 0;
+		uint64_t lastScheduled = 0;
+		auto lastTime = std::chrono::steady_clock::now();
+		while (!metricsStopping)
+		{
+			std::this_thread::sleep_for(std::chrono::seconds(1));
+			const auto now = std::chrono::steady_clock::now();
+			const double seconds = std::chrono::duration_cast<std::chrono::duration<double>>(now - lastTime).count();
+			const ProbeMetrics metrics = frameStore.Metrics();
+			const double renderFps = seconds > 0.0 ? static_cast<double>(metrics.renderedFrames - lastRendered) / seconds : 0.0;
+			const double scheduleFps = seconds > 0.0 ? static_cast<double>(metrics.scheduledFrames - lastScheduled) / seconds : 0.0;
+			lastRendered = metrics.renderedFrames;
+			lastScheduled = metrics.scheduledFrames;
+			lastTime = now;
+
+			std::cout << std::fixed << std::setprecision(1)
+				<< "renderFps=" << renderFps
+				<< " scheduleFps=" << scheduleFps
+				<< " free=" << metrics.freeCount
+				<< " completed=" << metrics.completedCount
+				<< " scheduled=" << metrics.scheduledCount
+				<< " drops=" << metrics.completedDrops
+				<< " pboMiss=" << metrics.pboQueueMisses
+				<< " completions=" << completions.load()
+				<< " late=" << late.load()
+				<< " dropped=" << dropped.load()
+				<< " decklinkBuffered=" << deckLink.State().actualDeckLinkBufferedFrames
+				<< "\n";
+		}
+	});
+
+	std::string line;
+	std::getline(std::cin, line);
+
+	metricsStopping = true;
+	if (metricsThread.joinable())
+		metricsThread.join();
+	playout.Stop();
+	deckLink.Stop();
+	renderer.Stop();
+	deckLink.ReleaseResources();
+	return 0;
+}
--- a/apps/DeckLinkRenderCadenceProbe/README.md
+++ b/apps/DeckLinkRenderCadenceProbe/README.md
@@ -0,0 +1,113 @@
+# DeckLink Render Cadence Probe
+
+This is a deliberately small architecture probe for the Phase 7.7 playout model.
+
+It is not the main app and does not use the main runtime, shader stack, preview path, input upload path, or render engine.
+
+## What It Tests
+
+The probe validates the clean playout spine:
+
+```text
+single OpenGL render thread
+  owns its own hidden GL context
+  renders a simple moving BGRA8 pattern at output cadence
+  queues GPU readback through a PBO ring
+  copies completed readbacks into latest-N system-memory slots
+
+system-memory frame store
+  owns free / rendering / completed / scheduled slots
+  drops old completed unscheduled frames when render cadence needs space
+  protects scheduled frames until DeckLink completion
+
+DeckLink playout thread
+  consumes completed system-memory frames
+  keeps a small scheduled buffer filled
+  does not render
+```
+
+Startup warms up rendered frames before starting DeckLink scheduled playback.
+
+## How To Build
+
+```powershell
+cmake --build --preset build-debug --target DeckLinkRenderCadenceProbe -- /m:1
+```
+
+The executable is:
+
+```text
+build\vs2022-x64-debug\Debug\DeckLinkRenderCadenceProbe.exe
+```
+
+## How To Run
+
+Run it from a terminal so you can see the telemetry:
+
+```powershell
+build\vs2022-x64-debug\Debug\DeckLinkRenderCadenceProbe.exe
+```
+
+Press Enter to stop.
+
+The first version assumes `1080p59.94` / `1920x1080` output and BGRA8 system-memory frames.
+
+## What To Watch
+
+The probe prints one line per second:
+
+- `renderFps`: cadence render throughput
+- `scheduleFps`: DeckLink scheduling throughput
+- `free`: free system-memory slots
+- `completed`: rendered, unscheduled slots
+- `scheduled`: slots currently owned by DeckLink
+- `drops`: old completed unscheduled frames recycled by the latest-N cache
+- `pboMiss`: PBO ring was full when trying to queue readback
+- `late`: DeckLink displayed-late completions
+- `dropped`: DeckLink dropped completions
+- `decklinkBuffered`: actual DeckLink buffered-frame count when available
+
+For a healthy architecture proof, expect:
+
+- `renderFps` close to the selected output cadence
+- `scheduleFps` close to the selected output cadence after warmup
+- `scheduled` hovering near the target buffer depth
+- `late` and `dropped` not increasing continuously
+- visible motion that is smooth on the DeckLink output
+
+## Interpretation
+
+If this probe is smooth at 59.94/60, the broad architecture is viable and the main app's remaining stutters are likely caused by integration details such as input upload, shared render-thread work, preview/screenshot work, or runtime/render-state coupling.
+
+If this probe is not smooth, the problem is lower level: DeckLink scheduling, OpenGL readback, Windows scheduling, or hardware/driver behavior.
+
+## Initial Result
+
+Date: 2026-05-12
+
+User-visible result:
+
+- output looked smooth
+
+Representative telemetry:
+
+```text
+renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=119 late=0 dropped=0 decklinkBuffered=4
+renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=179 late=0 dropped=0 decklinkBuffered=4
+renderFps=59.8 scheduleFps=59.8 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=239 late=0 dropped=0 decklinkBuffered=4
+renderFps=60.8 scheduleFps=59.8 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=299 late=0 dropped=0 decklinkBuffered=4
+renderFps=59.9 scheduleFps=59.9 free=7 completed=1 scheduled=4 drops=0 pboMiss=0 completions=360 late=0 dropped=0 decklinkBuffered=4
+renderFps=59.8 scheduleFps=60.8 free=8 completed=0 scheduled=4 drops=0 pboMiss=0 completions=420 late=0 dropped=0 decklinkBuffered=4
+```
+
+Read:
+
+- the clean architecture can sustain the selected output cadence on the test machine
+- BGRA8 PBO readback is viable when isolated from the main app's other render-thread work
+- latest-N system-memory buffering stayed stable
+- DeckLink actual buffered depth stayed at 4
+- there were no late frames, dropped frames, completed-frame drops, or PBO misses in the sampled output
+
+Implication:
+
+The main app's remaining stutters are likely integration/ownership issues rather than a fundamental DeckLink/OpenGL/BGRA8 readback limit. The highest-value suspects are input upload before output render, shared render-thread queue contention, preview/screenshot work, and runtime/render-state work on the output path.
--- a/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.cpp
+++ b/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.cpp
@@ -8,6 +8,7 @@

 #include <algorithm>
 #include <chrono>
+#include <cstdlib>
 #include <cstring>
 #include <cmath>
 #include <windows.h>
@@ -19,7 +20,8 @@ VideoBackend::VideoBackend(RenderEngine& renderEngine, HealthTelemetry& healthTe
 	mOutputProductionController(mPlayoutPolicy),
 	mReadyOutputQueue(mPlayoutPolicy),
 	mVideoIODevice(std::make_unique<DeckLinkSession>()),
-	mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine))
+	mBridge(std::make_unique<OpenGLVideoIOBridge>(renderEngine)),
+	mInputCaptureDisabled(IsEnvironmentFlagEnabled("VST_DISABLE_INPUT_CAPTURE"))
 {
 }

@@ -69,6 +71,12 @@ bool VideoBackend::ConfigureInput(const VideoFormat& inputVideoMode, std::string
 {
 	if (mLifecycle.State() != VideoBackendLifecycleState::Configuring)
 		ApplyLifecycleTransition(VideoBackendLifecycleState::Configuring, "Configuring video backend input.");
+	if (mInputCaptureDisabled)
+	{
+		MutableState().hasInputSource = false;
+		MutableState().statusMessage = "DeckLink input capture disabled by VST_DISABLE_INPUT_CAPTURE for output timing isolation.";
+		return true;
+	}
 	if (!mVideoIODevice->ConfigureInput(
 		[this](const VideoIOFrame& frame) { HandleInputFrame(frame); },
 		inputVideoMode,
@@ -110,19 +118,42 @@ bool VideoBackend::ConfigureOutput(const VideoFormat& outputVideoMode, bool exte
 bool VideoBackend::Start()
 {
 	ApplyLifecycleTransition(VideoBackendLifecycleState::Prerolling, "Video backend preroll starting.");
+	if (!mVideoIODevice->PrepareOutputSchedule())
+	{
+		ApplyLifecycleFailure(StatusMessage().empty() ? "Video backend output schedule preparation failed." : StatusMessage());
+		return false;
+	}
+
 	StartOutputCompletionWorker();
-	const bool started = mVideoIODevice->Start();
-	if (started)
-	{
-		StartOutputProducerWorker();
-		ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started.");
-	}
-	else
+	StartOutputProducerWorker();
+
+	if (!WarmupOutputPreroll())
 	{
+		StopOutputProducerWorker();
 		StopOutputCompletionWorker();
-		ApplyLifecycleFailure(StatusMessage().empty() ? "Video backend start failed." : StatusMessage());
+		ApplyLifecycleFailure(StatusMessage().empty() ? "Video backend preroll warmup failed." : StatusMessage());
+		return false;
 	}
-	return started;
+
+	if (!mInputCaptureDisabled && !mVideoIODevice->StartInputStreams())
+	{
+		StopOutputProducerWorker();
+		StopOutputCompletionWorker();
+		ApplyLifecycleFailure(StatusMessage().empty() ? "Video backend input stream start failed." : StatusMessage());
+		return false;
+	}
+
+	if (!mVideoIODevice->StartScheduledPlayback())
+	{
+		StopOutputProducerWorker();
+		mVideoIODevice->Stop();
+		StopOutputCompletionWorker();
+		ApplyLifecycleFailure(StatusMessage().empty() ? "Video backend scheduled playback start failed." : StatusMessage());
+		return false;
+	}
+
+	ApplyLifecycleTransition(VideoBackendLifecycleState::Running, "Video backend started.");
+	return true;
 }

 bool VideoBackend::Stop()
@@ -175,6 +206,8 @@ bool VideoBackend::HasInputDevice() const

 bool VideoBackend::HasInputSource() const
 {
+	if (mInputCaptureDisabled)
+		return false;
 	return mVideoIODevice->HasInputSource();
 }

@@ -288,6 +321,9 @@ void VideoBackend::ReportNoInputDeviceSignalStatus()

 void VideoBackend::HandleInputFrame(const VideoIOFrame& frame)
 {
+	if (mInputCaptureDisabled)
+		return;
+
 	const VideoIOState& state = mVideoIODevice->State();
 	mHealthTelemetry.TryReportSignalStatus(!frame.hasNoInputSource, state.inputFrameSize.width, state.inputFrameSize.height, state.inputDisplayModeName);
 	PublishInputSignalChanged(frame, state);
@@ -393,6 +429,39 @@ void VideoBackend::NotifyOutputProducer()
 	mOutputProducerCondition.notify_one();
 }

+bool VideoBackend::WarmupOutputPreroll()
+{
+	const VideoPlayoutPolicy policy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy);
+	const std::size_t targetPrerollFrames = static_cast<std::size_t>(policy.targetPrerollFrames);
+	if (targetPrerollFrames == 0)
+		return true;
+
+	const double frameBudgetMilliseconds = State().frameBudgetMilliseconds > 0.0 ? State().frameBudgetMilliseconds : 16.0;
+	const auto estimatedCadenceTime = std::chrono::duration_cast<std::chrono::milliseconds>(
+		std::chrono::duration<double, std::milli>(frameBudgetMilliseconds * static_cast<double>(targetPrerollFrames + 2)));
+	const auto timeout = (std::max)(std::chrono::milliseconds(1000), estimatedCadenceTime + std::chrono::milliseconds(500));
+	const auto deadline = std::chrono::steady_clock::now() + timeout;
+
+	while (std::chrono::steady_clock::now() < deadline)
+	{
+		ScheduleReadyOutputFramesToTarget();
+		const SystemOutputFramePoolMetrics metrics = mSystemOutputFramePool.GetMetrics();
+		RecordSystemMemoryPlayoutStats();
+		if (metrics.scheduledCount >= targetPrerollFrames)
+			return true;
+
+		NotifyOutputProducer();
+		const auto waitDuration = (std::min)(OutputProducerWakeInterval(), std::chrono::milliseconds(5));
+		std::unique_lock<std::mutex> lock(mOutputProducerMutex);
+		mOutputProducerCondition.wait_for(lock, waitDuration);
+		if (mOutputProducerWorkerStopping)
+			return false;
+	}
+
+	SetStatusMessage("Timed out warming up DeckLink preroll from rendered system-memory frames.");
+	return false;
+}
+
 void VideoBackend::OutputCompletionWorkerMain()
 {
 	for (;;)
@@ -1009,3 +1078,18 @@ std::string VideoBackend::PixelFormatName(VideoIOPixelFormat pixelFormat)
 {
 	return std::string(VideoIOPixelFormatName(pixelFormat));
 }
+
+bool VideoBackend::IsEnvironmentFlagEnabled(const char* name)
+{
+	if (name == nullptr || name[0] == '\0')
+		return false;
+
+	char* value = nullptr;
+	std::size_t valueSize = 0;
+	if (_dupenv_s(&value, &valueSize, name) != 0 || value == nullptr)
+		return false;
+
+	const std::string flag(value);
+	std::free(value);
+	return flag == "1" || flag == "true" || flag == "TRUE" || flag == "yes" || flag == "on";
+}
--- a/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.h
+++ b/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.h
@@ -77,6 +77,7 @@ private:
 	void StopOutputProducerWorker();
 	void OutputProducerWorkerMain();
 	void NotifyOutputProducer();
+	bool WarmupOutputPreroll();
 	std::chrono::milliseconds OutputProducerWakeInterval() const;
 	void ProcessOutputFrameCompletion(const VideoIOCompletion& completion);
 	std::size_t ProduceReadyOutputFrames(const VideoIOCompletion& completion, std::size_t maxFrames);
@@ -100,6 +101,7 @@ private:
 	void PublishTimingSample(const std::string& subsystem, const std::string& metric, double value, const std::string& unit);
 	static std::string CompletionResultName(VideoIOCompletionResult result);
 	static std::string PixelFormatName(VideoIOPixelFormat pixelFormat);
+	static bool IsEnvironmentFlagEnabled(const char* name);

 	HealthTelemetry& mHealthTelemetry;
 	RuntimeEventDispatcher& mRuntimeEventDispatcher;
@@ -127,6 +129,7 @@ private:
 	bool mOutputCompletionWorkerStopping = false;
 	bool mOutputProducerWorkerRunning = false;
 	bool mOutputProducerWorkerStopping = false;
+	bool mInputCaptureDisabled = false;
 	uint64_t mNextReadyOutputFrameIndex = 0;
 	uint64_t mInputFrameIndex = 0;
 	uint64_t mOutputFrameScheduleIndex = 0;
--- a/apps/LoopThroughWithOpenGLCompositing/videoio/VideoIOTypes.h
+++ b/apps/LoopThroughWithOpenGLCompositing/videoio/VideoIOTypes.h
@@ -116,6 +116,9 @@ public:
 	virtual bool SelectPreferredFormats(const VideoFormatSelection& videoModes, bool outputAlphaRequired, std::string& error) = 0;
 	virtual bool ConfigureInput(InputFrameCallback callback, const VideoFormat& inputVideoMode, std::string& error) = 0;
 	virtual bool ConfigureOutput(OutputFrameCallback callback, const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error) = 0;
+	virtual bool PrepareOutputSchedule() = 0;
+	virtual bool StartInputStreams() = 0;
+	virtual bool StartScheduledPlayback() = 0;
 	virtual bool Start() = 0;
 	virtual bool Stop() = 0;
 	virtual const VideoIOState& State() const = 0;
--- a/apps/LoopThroughWithOpenGLCompositing/videoio/decklink/DeckLinkSession.cpp
+++ b/apps/LoopThroughWithOpenGLCompositing/videoio/decklink/DeckLinkSession.cpp
@@ -660,9 +660,45 @@ bool DeckLinkSession::ScheduleOutputFrame(const VideoIOOutputFrame& frame)
 	return scheduled;
 }

-bool DeckLinkSession::Start()
+bool DeckLinkSession::PrepareOutputSchedule()
 {
 	mScheduler.Reset();
+	RefreshBufferedVideoFrameCount();
+	return output != nullptr;
+}
+
+bool DeckLinkSession::StartInputStreams()
+{
+	if (!input)
+		return true;
+
+	if (input->StartStreams() != S_OK)
+	{
+		MessageBoxA(NULL, "Could not start the DeckLink input stream.", "DeckLink start failed", MB_OK | MB_ICONERROR);
+		return false;
+	}
+	return true;
+}
+
+bool DeckLinkSession::StartScheduledPlayback()
+{
+	if (!output)
+	{
+		MessageBoxA(NULL, "Cannot start playout because no DeckLink output device is available.", "DeckLink start failed", MB_OK | MB_ICONERROR);
+		return false;
+	}
+
+	if (output->StartScheduledPlayback(0, mScheduler.TimeScale(), 1.0) != S_OK)
+	{
+		MessageBoxA(NULL, "Could not start DeckLink scheduled playback.", "DeckLink start failed", MB_OK | MB_ICONERROR);
+		return false;
+	}
+	RefreshBufferedVideoFrameCount();
+	return true;
+}
+
+bool DeckLinkSession::Start()
+{
 	if (!output)
 	{
 		MessageBoxA(NULL, "Cannot start playout because no DeckLink output device is available.", "DeckLink start failed", MB_OK | MB_ICONERROR);
@@ -676,6 +712,9 @@ bool DeckLinkSession::Start()

 	const VideoPlayoutPolicy policy = NormalizeVideoPlayoutPolicy(mPlayoutPolicy);
 	mPlayoutPolicy = policy;
+	if (!PrepareOutputSchedule())
+		return false;
+
 	for (unsigned i = 0; i < policy.targetPrerollFrames; i++)
 	{
 		CComPtr<IDeckLinkMutableVideoFrame> outputVideoFrame;
@@ -691,21 +730,7 @@ bool DeckLinkSession::Start()
 		}
 	}

-	if (input)
-	{
-		if (input->StartStreams() != S_OK)
-		{
-			MessageBoxA(NULL, "Could not start the DeckLink input stream.", "DeckLink start failed", MB_OK | MB_ICONERROR);
-			return false;
-		}
-	}
-	if (output->StartScheduledPlayback(0, mScheduler.TimeScale(), 1.0) != S_OK)
-	{
-		MessageBoxA(NULL, "Could not start DeckLink scheduled playback.", "DeckLink start failed", MB_OK | MB_ICONERROR);
-		return false;
-	}
-
-	return true;
+	return StartInputStreams() && StartScheduledPlayback();
 }

 bool DeckLinkSession::Stop()
--- a/apps/LoopThroughWithOpenGLCompositing/videoio/decklink/DeckLinkSession.h
+++ b/apps/LoopThroughWithOpenGLCompositing/videoio/decklink/DeckLinkSession.h
@@ -28,6 +28,9 @@ public:
 	bool SelectPreferredFormats(const VideoFormatSelection& videoModes, bool outputAlphaRequired, std::string& error) override;
 	bool ConfigureInput(InputFrameCallback callback, const VideoFormat& inputVideoMode, std::string& error) override;
 	bool ConfigureOutput(OutputFrameCallback callback, const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error) override;
+	bool PrepareOutputSchedule() override;
+	bool StartInputStreams() override;
+	bool StartScheduledPlayback() override;
 	bool Start() override;
 	bool Stop() override;

--- a/docs/CURRENT_SYSTEM_ARCHITECTURE.md
+++ b/docs/CURRENT_SYSTEM_ARCHITECTURE.md
@@ -0,0 +1,529 @@
+# Current System Architecture
+
+This document describes how the application currently works.
+
+It replaces the phase-by-phase design trail as the best entry point for understanding the repo. The older phase documents remain useful history, but they mix implementation notes, experiments, and target designs. This document is organized by current runtime behavior and subsystem ownership instead.
+
+## Application Shape
+
+The app is a live OpenGL compositor with DeckLink input/output, runtime control services, persistent layer-stack state, live state overlays, health telemetry, and a small internal event model.
+
+At runtime the major subsystems are:
+
+- `OpenGLComposite`
+- `RuntimeStore`
+- `RuntimeCoordinator`
+- `RuntimeSnapshotProvider`
+- `RuntimeServices`
+- `RuntimeUpdateController`
+- `RenderEngine`
+- `VideoBackend`
+- `DeckLinkSession`
+- `HealthTelemetry`
+- `RuntimeEventDispatcher`
+- `PersistenceWriter`
+
+The key architectural rule is:
+
+- runtime/control subsystems decide what state should exist
+- render subsystems decide how to draw that state
+- video subsystems decide how frames move to and from hardware
+- telemetry observes behavior without becoming a control plane
+
+## Process Startup
+
+The Win32 app creates the window, chooses a pixel format, creates an OpenGL context, initializes COM, and constructs `OpenGLComposite`.
+
+`OpenGLComposite` owns the high-level assembly of the runtime:
+
+- runtime store
+- runtime coordinator
+- runtime services
+- runtime update controller
+- render engine
+- video backend
+
+Startup proceeds broadly as:
+
+1. COM and OpenGL are initialized by the Win32 app.
+2. `OpenGLComposite::InitDeckLink()` discovers/configures DeckLink and runtime state.
+3. Runtime services are started.
+4. Shader programs and GL resources are initialized.
+5. The render thread is started.
+6. The video backend starts output preroll and playback.
+
+The normal VS Code debug launch currently sets:
+
+```text
+VST_DISABLE_INPUT_CAPTURE=1
+```
+
+That disables DeckLink input capture for output-timing isolation while keeping the output path active.
+
+## Runtime State
+
+### `RuntimeStore`
+
+`RuntimeStore` owns durable runtime data and file-backed state.
+
+It owns:
+
+- runtime host configuration
+- stored layer stack data
+- persisted parameter values
+- stack presets
+- shader package catalog metadata
+- runtime state presentation data
+- persistence requests
+
+It does not own render-thread resources, DeckLink timing, control ingress, or mutation policy.
+
+### `CommittedLiveState`
+
+`CommittedLiveState` owns current session/operator layer state that is live but not necessarily persisted as the durable base state.
+
+It gives the renderer and snapshot builder a named read model for current committed layer state.
+
+### `RuntimeCoordinator`
+
+`RuntimeCoordinator` is the mutation policy boundary.
+
+It validates and applies runtime mutations, classifies whether changes are persisted/committed/transient, emits persistence requests, and produces render reset/reload decisions.
+
+It keeps mutation decisions out of:
+
+- the render engine
+- control services
+- video backend
+- telemetry
+
+### `RuntimeSnapshotProvider`
+
+`RuntimeSnapshotProvider` publishes render-facing snapshots.
+
+It owns the currently published render snapshot and gives the render path a stable read boundary. Rendering does not read mutable store objects directly.
+
+## Live State And Layering
+
+The current render state is built from named layers of state:
+
+- persisted layer/package/default state from the runtime store
+- committed live/session state
+- transient live overlays from OSC/control input
+- render-local state owned by the renderer
+
+`RuntimeStateLayerModel` names these categories. `RenderStateComposer` and `RuntimeLiveState` combine live values into render-facing state.
+
+`RenderFrameInput` and `RenderFrameState` are the frame contract:
+
+- `RenderFrameInput` describes what kind of frame is being built
+- `RenderFrameState` describes the resolved state used to draw that frame
+
+The renderer should not ask global state systems which snapshot or layer state to use midway through drawing.
+
+## Control And Events
+
+### `RuntimeServices`
+
+`RuntimeServices` owns runtime-facing services such as OSC/control integration and service lifecycle.
+
+It connects control ingress to the coordinator and live-state bridge.
+
+### `ControlServices`
+
+`ControlServices` handles OSC/control ingress, buffering, and polling/wake behavior.
+
+It does not own runtime mutation policy. It normalizes ingress and asks the coordinator/runtime services to apply changes.
+
+### `RuntimeEventDispatcher`
+
+The app uses typed runtime events for internal coordination and observation.
+
+Events are used for:
+
+- runtime state broadcast requests
+- shader build lifecycle
+- backend state changes
+- input/output frame observations
+- timing samples
+- health and queue observations
+
+Events say what happened. Commands/request methods still exist where a caller needs an immediate success/failure answer.
+
+## Persistence
+
+Persistence is handled by `PersistenceWriter`.
+
+Runtime mutations can enqueue persistence requests without blocking the render/output path. Shutdown performs a bounded persistence flush.
+
+The store owns durable state; the writer owns background write execution.
+
+## Render System
+
+### `RenderEngine`
+
+`RenderEngine` owns normal runtime OpenGL work.
+
+It starts a dedicated render thread and binds the GL context on that thread. Runtime GL work enters through render-thread requests or render command queues.
+
+The render thread handles:
+
+- output frame rendering
+- input frame upload
+- preview present
+- screenshot capture
+- render-local resets
+- shader/rebuild application
+- temporal history and shader feedback resources
+
+Startup initialization still happens before the render thread starts while the app explicitly owns the context. Normal runtime work is routed through `RenderEngine`.
+
+### Current Render-Thread Limitation
+
+The current render thread is a shared GL executor, not a pure output-only cadence thread.
+
+This means output render can still be delayed by:
+
+- input upload work
+- preview present requests
+- screenshot capture
+- render reset commands
+- shader/resource update work
+- synchronous render-thread request queue wait
+
+For output-timing diagnosis, input capture can be disabled with:
+
+```text
+VST_DISABLE_INPUT_CAPTURE=1
+```
+
+When enabled, the backend skips DeckLink input configuration/start and `HasInputSource()` reports false.
+
+### `OpenGLRenderPipeline`
+
+`OpenGLRenderPipeline` draws the frame and performs output packing/readback.
+
+The current output path:
+
+1. binds the composite framebuffer
+2. calls the render effect callback
+3. blits/composes into the output framebuffer
+4. packs the output for the configured pixel format
+5. flushes GL
+6. reads output into the provided system-memory output frame
+7. records render/readback timing
+
+For BGRA8 output, the pipeline uses a BGRA-compatible pack framebuffer and async PBO readback by default.
+
+## Video Backend
+
+### `VideoBackend`
+
+`VideoBackend` owns app-level video device lifecycle, output production, system-memory frame slots, and backend playout health.
+
+It owns:
+
+- backend lifecycle state
+- output production worker
+- output completion worker
+- system-memory output frame pool
+- ready/completed output queue
+- render cadence controller
+- playout policy
+- output frame scheduling into `VideoIODevice`
+- backend timing and queue telemetry
+
+It does not own GL drawing. It asks `OpenGLVideoIOBridge` / `RenderEngine` to render into system-memory output frames.
+
+### Lifecycle
+
+The current backend lifecycle includes:
+
+- discovery
+- configuring
+- configured
+- prerolling
+- running
+- degraded
+- stopping
+- stopped
+- failed
+
+Startup now separates output schedule preparation from scheduled playback:
+
+1. prepare the DeckLink output schedule
+2. start output completion worker
+3. start output producer worker
+4. warm up rendered system-memory preroll frames
+5. optionally start input streams
+6. start DeckLink scheduled playback
+
+### Output Production
+
+The output producer is cadence-driven.
+
+`RenderCadenceController` tracks the selected output frame duration and decides when the producer should render another frame.
+
+The render producer attempts to render one output frame per selected output tick. It does not speed up just because DeckLink is empty.
+
+If render/GPU work is late enough, the cadence controller can skip late ticks according to policy.
+
+### System-Memory Frame Pool
+
+`SystemOutputFramePool` owns reusable system-memory output slots.
+
+Slots have four states:
+
+- `Free`
+- `Rendering`
+- `Completed`
+- `Scheduled`
+
+Completed-but-unscheduled frames are treated as a latest-N cache. If render cadence needs space and old completed frames have not been scheduled, the oldest unscheduled completed frame can be recycled.
+
+Scheduled frames are protected until DeckLink reports completion.
+
+### Output Queue
+
+`RenderOutputQueue` holds completed unscheduled output frames waiting to be scheduled.
+
+It is bounded and latest-N:
+
+- pushing beyond capacity releases/drops the oldest ready frame
+- `DropOldestFrame()` is used when the frame pool needs to recycle old completed work
+
+### Scheduling
+
+`VideoBackend::ScheduleReadyOutputFramesToTarget()` schedules completed system-memory frames up to the configured preroll/scheduled target.
+
+DeckLink scheduling is capped by the current app-owned scheduled count. Real DeckLink buffered-frame telemetry is also recorded.
+
+### Completion Handling
+
+DeckLink completion callbacks do not render.
+
+The callback path reports completion into `VideoBackend`, which processes completions on a backend worker. Completion processing:
+
+- releases the system-memory slot by buffer pointer
+- records pacing
+- accounts for late/drop/flushed/completed result
+- records telemetry
+- wakes the output producer
+
+## DeckLink Integration
+
+### `DeckLinkSession`
+
+`DeckLinkSession` is the DeckLink implementation of `VideoIODevice`.
+
+It owns:
+
+- DeckLink discovery
+- input/output mode selection
+- DeckLink input/output interfaces
+- keyer configuration
+- capture and playout delegates
+- schedule-time generation through `VideoPlayoutScheduler`
+- DeckLink frame scheduling
+- actual buffered-frame telemetry
+
+For output, system-memory frames are scheduled through DeckLink `CreateVideoFrameWithBuffer()`.
+
+When a system-memory frame is scheduled, `DeckLinkSession` records a map from the DeckLink frame object back to the app-owned system-memory buffer pointer. On completion, the buffer pointer is returned so `VideoBackend` can release the matching slot.
+
+### Actual DeckLink Buffer Telemetry
+
+`DeckLinkSession` calls `GetBufferedVideoFrameCount()` after schedule/completion where available.
+
+Telemetry separates:
+
+- actual DeckLink buffered frames
+- app-owned scheduled system-memory slots
+- synthetic schedule/completion counters
+- late/drop/flushed completion results
+
+## Output Timing Experiments And Current Finding
+
+The repo includes `DeckLinkRenderCadenceProbe`, a small standalone test app under:
+
+```text
+apps/DeckLinkRenderCadenceProbe
+```
+
+The probe does not use the main runtime, shader system, preview path, input upload path, or shared render engine. It uses:
+
+- one OpenGL render thread with its own hidden GL context
+- simple BGRA8 motion rendering
+- async PBO readback
+- latest-N system-memory frame slots
+- a playout thread that feeds DeckLink
+- real rendered warmup before scheduled playback
+
+The first hardware result was smooth at roughly 59.94/60 fps with:
+
+- `renderFps` near 59.9
+- `scheduleFps` near 59.9
+- DeckLink actual buffered frames stable at 4
+- no late frames
+- no dropped frames
+- no PBO misses
+- no completed-frame drops
+
+That proves the clean architecture can work on the test machine. Remaining main-app timing issues are therefore likely integration/ownership issues in the main app rather than a fundamental DeckLink/OpenGL/BGRA8 limitation.
+
+The highest-value current suspects are:
+
+- input upload sharing the output render thread
+- shared render-thread task queue contention
+- preview/screenshot work
+- runtime/render-state work on the output path
+
+## Health Telemetry
+
+`HealthTelemetry` owns app-visible health and timing observations.
+
+It records:
+
+- signal/input status
+- performance/render timing
+- event queue timing
+- backend lifecycle/playout state
+- output render queue wait
+- output render/readback timing
+- system-memory frame counts
+- actual DeckLink buffer depth
+- late/drop/flushed/completed frame counters
+- schedule-call timing/failure counts
+
+Several hot-path telemetry calls use try-lock variants so observation does not become a major timing dependency.
+
+Runtime state presentation exposes telemetry through the runtime JSON/open API surface.
+
+## Preview And Screenshot
+
+Preview is best-effort.
+
+`OpenGLComposite::paintGL()` skips preview when the backend reports output pressure. Preview presentation is requested through the render thread.
+
+Screenshot capture is also a render-thread request. It reads pixels from the output framebuffer and writes PNG asynchronously after capture.
+
+Both preview and screenshot share GL execution with output render, so they are secondary to output timing.
+
+## Output Readback Modes
+
+The output readback path supports environment-selected modes:
+
+```text
+VST_OUTPUT_READBACK_MODE=async_pbo
+VST_OUTPUT_READBACK_MODE=sync
+VST_OUTPUT_READBACK_MODE=cached_only
+```
+
+Default behavior is `async_pbo`.
+
+Experiment findings:
+
+- direct synchronous readback was slower on the sampled machine
+- cached-only recovered timing but is visually invalid for live motion
+- BGRA8 pack framebuffer plus async PBO removed the earlier large readback stall
+
+## Current Debug/Experiment Launches
+
+VS Code launch configurations include:
+
+- `Debug LoopThroughWithOpenGLCompositing`
+- `Debug LoopThroughWithOpenGLCompositing - sync readback experiment`
+- `Debug LoopThroughWithOpenGLCompositing - cached output experiment`
+- `Debug DeckLinkRenderCadenceProbe`
+
+The default main-app debug launch currently disables input capture with `VST_DISABLE_INPUT_CAPTURE=1` so output timing can be tested without input upload interference.
+
+## Current Ownership Summary
+
+| Area | Current Owner |
+| --- | --- |
+| Durable runtime config/state | `RuntimeStore` |
+| Current committed live layer state | `CommittedLiveState` |
+| Mutation validation/policy | `RuntimeCoordinator` |
+| Render snapshot publication | `RuntimeSnapshotProvider` |
+| OSC/control ingress | `RuntimeServices` / `ControlServices` |
+| Internal event dispatch | `RuntimeEventDispatcher` |
+| Background persistence writes | `PersistenceWriter` |
+| GL context and normal GL work | `RenderEngine` render thread |
+| Render-pass execution and output readback | `OpenGLRenderPipeline` |
+| Device lifecycle and output production | `VideoBackend` |
+| DeckLink API integration | `DeckLinkSession` |
+| Operational health/timing | `HealthTelemetry` |
+
+## Current Runtime Flow Summary
+
+### Control Mutation
+
+```text
+OSC/API/control input
+  -> RuntimeServices / ControlServices
+  -> RuntimeCoordinator
+  -> RuntimeStore / CommittedLiveState / RuntimeLiveState
+  -> RuntimeSnapshotProvider publication or live overlay update
+  -> RuntimeEventDispatcher observations
+```
+
+### Output Render
+
+```text
+VideoBackend output producer
+  -> RenderCadenceController tick
+  -> SystemOutputFramePool acquire rendering slot
+  -> OpenGLVideoIOBridge::RenderScheduledFrame
+  -> RenderEngine::RequestOutputFrame
+  -> render thread
+  -> OpenGLRenderPipeline::RenderFrame
+  -> system-memory output slot
+  -> RenderOutputQueue completed frame
+```
+
+### DeckLink Playout
+
+```text
+RenderOutputQueue completed frame
+  -> VideoBackend schedules to target
+  -> DeckLinkSession::ScheduleOutputFrame
+  -> CreateVideoFrameWithBuffer
+  -> ScheduleVideoFrame
+  -> DeckLink playback
+  -> completion callback
+  -> VideoBackend completion worker
+  -> release scheduled system-memory slot
+```
+
+### Input Capture
+
+When input capture is enabled:
+
+```text
+DeckLink input callback
+  -> VideoBackend::HandleInputFrame
+  -> OpenGLVideoIOBridge::UploadInputFrame
+  -> RenderEngine::QueueInputFrame
+  -> render thread upload
+```
+
+When `VST_DISABLE_INPUT_CAPTURE=1`, this flow is skipped.
+
+## Known Current Constraints
+
+- The main app render thread still handles multiple kinds of GL work.
+- Output render still uses a synchronous request/response call into the render thread.
+- Input upload can contend with output render when input capture is enabled.
+- Preview and screenshot share the render thread.
+- Phase/experiment documents still exist as historical notes, but this document is the current architecture summary.
+
+## Practical Rules
+
+- Keep one owner for each kind of state.
+- Keep GL work on the render thread.
+- Keep DeckLink completion callbacks passive.
+- Treat completed unscheduled output frames as latest-N cache entries.
+- Protect scheduled output frames until DeckLink completion.
+- Keep output timing more important than preview/screenshot.
+- Measure timing by domain instead of adding fallback branches blindly.
--- a/docs/DECKLINK_OPENGL_LESSONS_LEARNED.md
+++ b/docs/DECKLINK_OPENGL_LESSONS_LEARNED.md
@@ -0,0 +1,377 @@
+# DeckLink / OpenGL Lessons Learned
+
+This document summarizes the practical lessons from the Phase 3-7.7 refactor work, especially the DeckLink playout timing experiments.
+
+It is intentionally broader than the phase design docs. The goal is to preserve what we now know about the system so future architecture choices start from evidence instead of rediscovering the same constraints.
+
+## High-Level Lesson
+
+The application is not just a renderer with a video output attached.
+
+It is a real-time playout system with several independent clocks:
+
+- the selected output cadence, for example 59.94 fps
+- the GPU render/readback timeline
+- the DeckLink scheduled playback clock
+- the Windows thread scheduler
+- the input capture callback cadence
+- the preview/window message loop
+- the runtime/control update cadence
+
+Stable playback depends on assigning one owner to each timing domain and keeping those domains loosely coupled.
+
+## What Worked
+
+### Named State Contracts Helped
+
+`RenderFrameInput` and `RenderFrameState` made the render path easier to reason about.
+
+Before that, frame rendering depended on scattered choices about snapshots, cache state, layer state, input source state, and runtime service state. Naming the frame contract made it possible to move logic out of `RenderEngine` and toward explicit frame construction.
+
+Lesson:
+
+- keep frame inputs explicit
+- keep render-frame state immutable for the duration of a frame
+- avoid making the renderer ask global systems which state it should use mid-frame
+
+### Render-Thread Ownership Helped
+
+Moving GL work behind a render-thread boundary reduced wrong-thread GL access risk and made ownership clearer.
+
+The current render thread is still shared by output render, input upload, preview, screenshot, resize, and reset work, so it is not yet a pure output cadence thread. But the ownership direction is right.
+
+Lesson:
+
+- GL context ownership should be explicit
+- public methods should enqueue or request work
+- render-thread methods should own GL bodies
+- synchronous calls should be reserved for places that genuinely need a result
+
+### Background Persistence Was Worth It
+
+Moving persistence away from hot render/control paths reduced incidental latency risk and made state writes easier to reason about.
+
+Lesson:
+
+- runtime/control persistence should not sit on output render timing
+- shutdown flushing is fine, steady-state blocking is not
+
+### Lifecycle State Was Worth It
+
+The backend lifecycle model gave us better failure and shutdown vocabulary.
+
+This became important once startup stopped being a single `Start()` call and became:
+
+- prepare output schedule
+- start render cadence
+- warm up real frames
+- start input streams
+- start scheduled playback
+
+Lesson:
+
+- playout startup needs phases
+- degradation should be explicit
+- shutdown order should be deliberate and testable
+
+## What Did Not Work
+
+### Completion-Driven Rendering Was Too Fragile
+
+Rendering on or near DeckLink completion can average the target frame rate, but it leaves no headroom.
+
+When the callback asks for a frame just-in-time, any small delay in render, readback, scheduling, or Windows wake timing becomes visible as a buffer dip or stutter.
+
+Lesson:
+
+- DeckLink completion should release scheduled resources and wake scheduling
+- it should not render
+- it should not decide visual fallback policy in steady state
+
+### Black Fallback Hid The Real Timing Problem
+
+Scheduling black on app-ready underrun made the pipeline appear to keep moving while producing visible black flicker.
+
+It also made diagnosis harder because DeckLink could have scheduled frames while the app visibly failed.
+
+Lesson:
+
+- black is a startup/error/degraded-state policy, not normal steady-state recovery
+- steady-state underruns should be measured as timing failures
+
+### Synthetic Schedule Lead Was Misleading
+
+The synthetic scheduled/completed index could report a large buffer while DeckLink still showed low actual device buffer depth.
+
+Real DeckLink `GetBufferedVideoFrameCount()` telemetry was necessary to separate:
+
+- app-owned scheduled slots
+- synthetic schedule lead
+- actual hardware/device buffer depth
+
+Lesson:
+
+- measure actual device buffer depth
+- keep synthetic counters only as diagnostics
+- do not infer device health from internal stream indexes alone
+
+### More Buffer Is Not Automatically Smoother
+
+Increasing DeckLink scheduled frames sometimes made the reported device buffer look healthier while visible motion still stuttered.
+
+The problem was not only "how many frames are scheduled"; it was also whether the scheduled frames represented a stable render cadence.
+
+Lesson:
+
+- buffer depth absorbs jitter, but it cannot fix bad cadence ownership
+- a full buffer of poorly timed or repeated frames can still look wrong
+
+### Speed-Up Catch-Up Was The Wrong Instinct
+
+Letting the producer sprint to refill the buffer created new timing artifacts.
+
+The render side should behave like a stable game/render loop: render at the selected cadence, record lateness, and only skip ticks when render/GPU work itself overruns.
+
+Lesson:
+
+- the render thread should not render faster because DeckLink is empty
+- buffer drain is a failure signal, not a sprint signal
+- warmup should fill buffers before playback starts
+
+## GPU Readback Lessons
+
+### The Original Readback Path Was The Major Collapse
+
+Early Phase 7.5 telemetry showed `glReadPixels(..., nullptr)` into the PBO costing roughly 8-14 ms on representative samples. That was enough to collapse ready depth and cause long freezes.
+
+Direct synchronous readback was worse on the sampled machine.
+
+Cached-output mode, while visually invalid for live output, immediately recovered timing. That proved ongoing GPU-to-CPU transfer was the major cost in that version of the path.
+
+Lesson:
+
+- isolate readback cost from render cost
+- use intentionally invalid cached-output experiments when diagnosing throughput
+- do not assume async PBO is actually cheap on every format/driver path
+
+### BGRA8 Packing Changed The Problem
+
+Changing the output path so readback matched the DeckLink BGRA8 format made `asyncQueueReadPixelsMs` drop dramatically in sampled runs.
+
+Long pauses disappeared and the remaining issue became short stutters/cadence gaps.
+
+Lesson:
+
+- output/readback format matters
+- avoid format conversions on the readback path when possible
+- BGRA8 is a good current format target for experiments
+- v210/YUV packing can be deferred until cadence is stable
+
+### DeckLink SDK Fast Transfer Was Not Available On The Test GPU
+
+The SDK OpenGL fast-transfer path depends on hardware/extension support that was not present on the RTX 4060 Ti test machine:
+
+- NVIDIA DVP path was gated around Quadro-style support
+- `GL_AMD_pinned_memory` was not exposed
+
+Lesson:
+
+- SDK fast-transfer samples are useful references but not a universal fix
+- unsupported fast-transfer code should not be central to the architecture
+- the default path must work with ordinary consumer GPUs
+
+## DeckLink Lessons
+
+### DeckLink Wants Scheduled System-Memory Frames
+
+Using `CreateVideoFrameWithBuffer()` lets DeckLink schedule frames backed by our system-memory slots.
+
+That is the right ownership model for this app:
+
+- render/readback writes into a slot
+- DeckLink schedules a frame that references that slot
+- the slot is protected until DeckLink completion
+
+Lesson:
+
+- system-memory slots are the contract between render and playout
+- scheduled slots must not be recycled early
+- completed-but-unscheduled slots can be latest-N cache entries
+
+### Startup Needs Real Preroll
+
+Starting scheduled playback before real rendered frames exist creates avoidable startup fragility.
+
+The better startup shape is:
+
+- prepare the DeckLink schedule
+- start render cadence
+- render warmup frames at normal cadence
+- schedule those frames as preroll
+- start DeckLink scheduled playback
+
+Lesson:
+
+- do not use black preroll as the normal startup path
+- do not render faster during warmup
+- if warmup cannot fill in a bounded time, fail/degrade visibly
+
+## Buffering Lessons
+
+### There Are Two Different Buffers
+
+The app has at least two important frame stores:
+
+- system-memory completed/latest-N frames
+- DeckLink scheduled/device buffer
+
+They have different ownership rules.
+
+Completed-but-unscheduled frames are disposable if a newer frame is available and cadence needs the slot.
+
+Scheduled frames are not disposable because DeckLink may still read them.
+
+Lesson:
+
+- latest-N completed frames are a cache
+- scheduled frames are owned by DeckLink until completion
+- keep metrics for both
+
+### Consume-Before-Render Is The Wrong Model For Completed Frames
+
+If the render cadence waits for completed frames to be consumed, DeckLink timing can indirectly slow the renderer.
+
+That couples the clocks again.
+
+Lesson:
+
+- render cadence should keep rendering at selected cadence
+- if completed cache is full, recycle/drop the oldest unscheduled completed frame
+- only scheduled/in-flight saturation should prevent rendering to a safe slot
+
+## Render Thread Lessons
+
+### The Current Render Thread Is Still Shared
+
+The GL render thread currently handles:
+
+- output rendering
+- input upload
+- preview present
+- screenshot capture
+- render reset commands
+- shader/resource operations
+
+Output render can therefore be delayed by queued or inline work.
+
+Lesson:
+
+- "one GL thread" is not the same as "one output cadence thread"
+- output render should become the highest-priority GL operation
+- non-output GL work needs budgets, coalescing, or deferral
+
+### Input Upload Is A Suspect Timing Coupling
+
+Output render currently processes input upload work immediately before rendering the output frame.
+
+That keeps input fresh but can steal time from the exact frame we are trying to render on cadence.
+
+Lesson:
+
+- measure input upload count and time immediately before output render
+- test policies such as `one_before_output` or `skip_before_output`
+- prefer latest-input semantics over draining every pending upload
+
+### Preview And Screenshot Must Stay Secondary
+
+Preview is useful, but DeckLink output is the real-time path.
+
+Screenshot and preview share GL resources and can block or queue work on the same render thread.
+
+Lesson:
+
+- preview should be skipped when output is under pressure
+- screenshot capture should be treated as disruptive unless proven otherwise
+- forced preview/screenshot should be visible in telemetry
+
+## Telemetry Lessons
+
+The useful telemetry has been the telemetry that separates domains:
+
+- output render queue wait
+- render/draw time
+- readback queue time
+- readback fence/map/copy time
+- app ready/completed queue depth
+- system-memory free/rendering/completed/scheduled counts
+- actual DeckLink buffered-frame count
+- DeckLink schedule-call time/failures
+- late/drop completion counts
+
+Lesson:
+
+- averages are not enough
+- timing spikes matter more than steady low values
+- count ownership states, not just queue depth
+- keep experiment logs short and evidence-based
+
+## Current Architectural Direction
+
+The current direction is still sound:
+
+```text
+Render cadence loop
+  renders at selected output cadence
+  writes latest-N completed system-memory frames
+  never sprints to refill DeckLink
+
+Frame store
+  owns free / rendering / completed / scheduled slots
+  recycles unscheduled completed frames when needed
+  protects scheduled frames until completion
+
+DeckLink playout scheduler
+  consumes completed frames
+  tops up actual device buffer
+  never renders
+
+Completion callback
+  releases scheduled slots
+  records completion result
+  wakes scheduler
+```
+
+## Rewrite Lesson
+
+A full restart is not obviously the right next move.
+
+The current repo now contains:
+
+- working runtime/control architecture
+- useful phase docs
+- non-GL tests around key state machines
+- real telemetry
+- a clearer understanding of DeckLink and OpenGL timing
+
+The better next step is likely a contained "V2 spine" inside the current app:
+
+- harden the render cadence loop
+- harden the frame store
+- separate DeckLink scheduling
+- demote preview/screenshot/input upload below output cadence
+- delete old compatibility branches as they become unnecessary
+
+A full rewrite becomes attractive only if the current GL ownership model cannot be made deterministic without excessive surgery, or if the project switches rendering API.
+
+## Practical Rules Going Forward
+
+- One timing authority per domain.
+- Render cadence is time-driven, not completion-driven.
+- DeckLink scheduling is device-buffer-driven, not render-driven.
+- Completion callbacks release and report; they do not render.
+- System-memory completed frames are latest-N cache entries.
+- Scheduled frames are protected until DeckLink completion.
+- Startup uses real rendered warmup/preroll.
+- Black fallback is degraded/error behavior, not steady-state behavior.
+- Output render has priority over preview, screenshot, and bulk input upload.
+- Measure before adding recovery branches.
--- a/docs/PHASE_1_SUBSYSTEM_BOUNDARIES_DESIGN.md
+++ b/docs/PHASE_1_SUBSYSTEM_BOUNDARIES_DESIGN.md
@@ -1,715 +0,0 @@
-# Phase 1 Design: Subsystem Boundaries and Target Architecture
-
-This document expands Phase 1 of [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) into a concrete target design. Its purpose is to define the long-term subsystem split before later phases introduce a full event model and move rendering onto a sole-owner render thread.
-
-The main goal of Phase 1 is not to immediately rewrite the app. It is to establish clear ownership boundaries so later refactors all move toward the same architecture instead of solving local problems in conflicting ways.
-
-## Status
-
-Phase 1 has two different meanings in this repo, and they should not be collapsed:
-
- Phase 1 design package: complete.
- Phase 1 runtime implementation foothold: complete.
-
-The completed design package includes the agreed subsystem names, responsibilities, dependency rules, state categories, and current-to-target migration map. The runtime code now has concrete subsystem folders, collaborators, read models, and tests for those boundaries, and the compiled runtime path no longer depends on `RuntimeHost`. That is different from saying every target boundary is fully extracted across the whole app: later roadmap phases are still responsible for the event model, sole-owner render thread, explicit live-state layering, background persistence, backend state machine, and fuller telemetry.
-
-## Why Phase 1 Exists
-
-At the start of this phase the app worked, but too many responsibilities converged in a few places:
-
- `RuntimeHost` owned persistence, live layer state, shader package access, status reporting, and mutation entrypoints.
- `OpenGLComposite` coordinates runtime setup, render state retrieval, shader rebuild handling, transient OSC overlay behavior, and video backend integration.
- DeckLink callback-driven playout still reaches directly into render-facing work.
- Background services rely on polling and shared mutable state more than explicit subsystem contracts.
-
-Those are exactly the kinds of overlaps that make timing issues, state regressions, and recovery edge cases harder to solve cleanly.
-
-Phase 1 creates a map for where each responsibility should eventually live.
-
-## Design Goals
-
-The target architecture should optimize for:
-
- live timing isolation
- explicit state ownership
- predictable recovery behavior
- clear boundaries between persistent state and transient live state
- easier testing of non-GL and non-hardware logic
- fewer cross-thread shared mutable objects
- a playout model that can evolve toward producer/consumer scheduling
-
-## Non-Goals
-
-Phase 1 does not itself require:
-
- replacing every direct call with events immediately
- moving all rendering to a new thread yet
- redesigning the shader contract again
- changing DeckLink behavior in place
- removing all existing classes before replacements exist
-
-This phase is the target design and the dependency rules. Later phases perform the actual extraction.
-
-## Current Pressure Points
-
-The following current code paths are the strongest evidence for the split proposed here:
-
- `RuntimeHost` was both store and live authority:
-  - `RuntimeHost.h`
-  - `RuntimeHost.cpp`
- `OpenGLComposite` is both app orchestrator and render/runtime coordinator:
-  - [OpenGLComposite.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/composite/OpenGLComposite.cpp:106)
-  - [OpenGLComposite.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/composite/OpenGLComposite.cpp:283)
- `RuntimeServices` mixes service orchestration with polling and deferred state work:
-  - [RuntimeServices.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/control/RuntimeServices.h:46)
-  - [RuntimeServices.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/control/RuntimeServices.cpp:194)
- Playout is still callback-coupled to render-facing work:
-  - [OpenGLVideoIOBridge.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/pipeline/OpenGLVideoIOBridge.cpp:68)
-
-## Target Subsystems
-
-The long-term architecture should converge on seven primary subsystems:
-
-1. `RuntimeStore`
-2. `RuntimeCoordinator`
-3. `RuntimeSnapshotProvider`
-4. `ControlServices`
-5. `RenderEngine`
-6. `VideoBackend`
-7. `HealthTelemetry`
-
-The split below is intentionally sharper than the current code. The point is to make ownership obvious.
-
-Subsystem-specific design notes that elaborate these boundaries live under [docs/subsystems](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems).
-
-## Phase 1 Document Set
-
-This document is the parent note for the Phase 1 subsystem package. The bundle index and subsystem notes live here:
-
- [Subsystem Design Index](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/README.md)
- [RuntimeStore.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeStore.md)
- [RuntimeCoordinator.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeCoordinator.md)
- [RuntimeSnapshotProvider.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeSnapshotProvider.md)
- [ControlServices.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/ControlServices.md)
- [RenderEngine.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RenderEngine.md)
- [VideoBackend.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/VideoBackend.md)
- [HealthTelemetry.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/HealthTelemetry.md)
-
-## Current Implementation Foothold
-
-The codebase now has a Phase 1 runtime implementation foothold in place:
-
- `RuntimeStore`
-  - [RuntimeStore.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/store/RuntimeStore.h)
-  - [RuntimeStore.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/store/RuntimeStore.cpp)
- `RuntimeConfigStore`
-  - [RuntimeConfigStore.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/store/RuntimeConfigStore.h)
-  - [RuntimeConfigStore.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/store/RuntimeConfigStore.cpp)
- `ShaderPackageCatalog`
-  - [ShaderPackageCatalog.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/store/ShaderPackageCatalog.h)
-  - [ShaderPackageCatalog.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/store/ShaderPackageCatalog.cpp)
- `RuntimeCoordinator`
-  - [RuntimeCoordinator.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/coordination/RuntimeCoordinator.h)
-  - [RuntimeCoordinator.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/coordination/RuntimeCoordinator.cpp)
- `RuntimeSnapshotProvider`
-  - [RuntimeSnapshotProvider.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/snapshot/RuntimeSnapshotProvider.h)
-  - [RuntimeSnapshotProvider.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/snapshot/RuntimeSnapshotProvider.cpp)
- `RenderSnapshotBuilder`
-  - [RenderSnapshotBuilder.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/snapshot/RenderSnapshotBuilder.h)
-  - [RenderSnapshotBuilder.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/snapshot/RenderSnapshotBuilder.cpp)
- `ControlServices`
-  - [ControlServices.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/control/ControlServices.h)
-  - [ControlServices.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/control/ControlServices.cpp)
- `HealthTelemetry`
-  - [HealthTelemetry.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/telemetry/HealthTelemetry.h)
-  - [HealthTelemetry.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/runtime/telemetry/HealthTelemetry.cpp)
- `RenderEngine`
-  - [RenderEngine.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/RenderEngine.h)
-  - [RenderEngine.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/RenderEngine.cpp)
- `VideoBackend`
-  - [VideoBackend.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.h)
-  - [VideoBackend.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/videoio/VideoBackend.cpp)
-
-The runtime seams are now concrete code boundaries. Some app-level flows still delegate through compatibility helpers, `OpenGLComposite`, `DeckLinkSession`, and the existing bridge/pipeline classes, but runtime responsibilities have moved behind named collaborators:
-
- UI/runtime control calls in `OpenGLCompositeRuntimeControls.cpp` now route through `RuntimeCoordinator`
- runtime startup now initializes path resolution and config loading through `RuntimeConfigStore`, with shader package scan and lookup delegated to `ShaderPackageCatalog`
- runtime/UI state JSON composition now routes through `RuntimeStatePresenter` and `RuntimeStateJson` instead of living in `RuntimeHost` or `RuntimeStore`
- regular stored layer mutations and stack preset save/load now route through `RuntimeStore` into `LayerStackStore` instead of `RuntimeHost` public APIs
- persisted OSC-by-control-key commits now route through `RuntimeCoordinator` before applying store changes
- mutation and reload policy now routes through `RuntimeCoordinator`
- parameter target resolution, value normalization, trigger classification, and move no-op classification now live under `RuntimeCoordinator`
- render-state and shader-build reads in `OpenGLComposite.cpp`, `OpenGLShaderPrograms.cpp`, and `ShaderBuildQueue.cpp` now route through `RuntimeSnapshotProvider`
- `RuntimeSnapshotProvider` now depends on `RenderSnapshotBuilder` rather than on `RuntimeStore` friendship or shared `RuntimeHost` access
- render-state assembly, cached parameter refresh, dynamic frame-field application, and render snapshot versions now live in `RenderSnapshotBuilder` instead of `RuntimeStore`
- `RuntimeSnapshotProvider` now publishes versioned render snapshot objects and serves matching consumers from the last published snapshot
- service ingress and polling coordination now route through `ControlServices`
- `ControlServices` now queues coordinator results for OSC commit and file-poll outcomes instead of directly deciding runtime/store policy
- timing and status writes now route through `HealthTelemetry`
- `HealthTelemetry` now owns the live signal, video-I/O, and performance snapshots directly instead of `RuntimeHost` keeping those backing fields
- render-side frame advancement and render-performance reporting now flow through `RuntimeSnapshotProvider` and `HealthTelemetry` instead of directly through `RuntimeHost`
- `RuntimeStore` now owns its durable/session backing fields directly instead of wrapping a compatibility `RuntimeHost` object
- `RuntimeConfigStore` now owns runtime config parsing, path resolution, configured ports/formats, runtime roots, and shader compiler paths instead of leaving those responsibilities inside `RuntimeStore`
- `ShaderPackageCatalog` now owns shader package scanning, package status/order/lookup, and package asset/source change comparison instead of leaving those responsibilities inside `RuntimeStore`
- `LayerStackStore` now owns durable layer state, layer CRUD/reorder, parameter persistence, and stack preset value serialization/load instead of leaving those responsibilities inside `RuntimeStore`
- `RuntimeStatePresenter` and `RuntimeStateJson` now own runtime-state JSON assembly and layer-stack presentation serialization instead of leaving those responsibilities inside storage classes
- `RuntimeCoordinator` now uses explicit `RuntimeStore` query APIs/read models instead of friendship or direct store-internal access
- live OSC overlay state and smoothing/commit decisions now live under `RenderEngine` instead of `OpenGLComposite`
- coordinator result application, shader-build requests, ready-build application, and runtime-state broadcasts now route through `RuntimeUpdateController` instead of being interpreted directly by `OpenGLComposite`
- `OpenGLComposite` now owns a `RenderEngine` seam for renderer, pipeline, render-pass, and shader-program responsibilities
- `OpenGLComposite` now owns a `VideoBackend` seam for device/session ownership and callback wiring
- `OpenGLVideoIOBridge` now acts as an explicit compatibility adapter between `VideoBackend` and `RenderEngine`, instead of `OpenGLComposite` directly owning both sides
- `RuntimeSubsystemTests` now cover the new runtime seams around layer-stack storage, preset round-trips, mutation classification, and runtime-state JSON serialization
-
-That means Phase 2 can focus on eventing and coordination mechanics rather than inventing the runtime boundary vocabulary.
-
-Later-phase extraction work includes:
-
- moving persistence to an asynchronous writer in a later phase
- replacing polling/shared-object coordination with the planned internal event model
- making the render thread the sole GL owner
- formalizing committed-live versus transient-overlay layering
- making backend lifecycle and telemetry richer and more explicit
-
-## Subsystem Responsibilities
-
-### `RuntimeStore`
-
-`RuntimeStore` owns persisted and operator-authored state.
-
-It is the source of truth for:
-
- runtime config loaded from disk
- persisted layer stack structure
- persisted parameter values
- stack preset serialization/deserialization
- shader/package metadata that must survive across renders
-
-It should not be responsible for:
-
- render-thread timing
- GL resource lifetime
- live transient overlays
- hardware callback coordination
- UI/websocket broadcasting policy
-
-Design rules:
-
- disk I/O belongs here or in its dedicated writer helper
- values here are authoritative for saved state
- writes may be debounced later, but the data model itself belongs here
-
-### `RuntimeCoordinator`
-
-`RuntimeCoordinator` is the mutation and policy layer.
-
-It is responsible for:
-
- receiving valid mutation requests from controls, services, or automation
- validating requested changes against shader definitions and config rules
- resolving how persisted state, committed live state, and transient overlays should interact
- requesting snapshot publication when state changes affect render
- requesting persistence when stored state changes
-
-It should not be responsible for:
-
- direct disk serialization details
- direct GL work
- hardware device lifecycle
- polling loops
-
-Design rules:
-
- all non-render mutations should eventually flow through this layer
- this layer decides whether a change is persisted, transient, or both
- this layer owns state policy, not device policy
-
-### `RuntimeSnapshotProvider`
-
-`RuntimeSnapshotProvider` publishes render-facing snapshots.
-
-It is responsible for:
-
- building immutable or near-immutable render snapshots
- translating runtime state into render-ready structures
- publishing versioned snapshots
- serving the render side without large mutable shared locks
-
-It should not be responsible for:
-
- deciding whether a mutation is allowed
- directly applying UI/OSC requests
- persistence
- shader compilation orchestration
-
-Design rules:
-
- render consumes snapshots, not live mutable store objects
- snapshots should be cheap to read and explicit about version changes
- dynamic frame-only values may still be attached later, but the snapshot shape should stay stable
-
-### `ControlServices`
-
-`ControlServices` is the ingress boundary for non-render control sources.
-
-It is responsible for:
-
- OSC receive and route resolution
- REST/websocket/control UI ingress
- file-watch or reload request ingress
- translating external inputs into typed internal actions/events
- low-cost buffering/coalescing where appropriate
-
-It should not be responsible for:
-
- persistence decisions
- render snapshot building
- hardware playout policy
- direct long-lived state ownership beyond ingress-specific queues
-
-Design rules:
-
- external inputs enter here and are normalized before they touch core state
- service-specific timing concerns stay here unless they affect whole-app policy
- no service should directly mutate render-facing state structures
-
-### `RenderEngine`
-
-`RenderEngine` is the owner of live rendering behavior.
-
-It is responsible for:
-
- sole ownership of GL work in the target architecture
- shader program lifecycle once compilation outputs are available
- texture upload scheduling
- render-pass execution
- temporal history and shader feedback resources
- transient render-only overlays
- preview production as a subordinate output
- output-frame production for the video backend
-
-It should not be responsible for:
-
- persistence
- user-facing control normalization
- hardware discovery/configuration
- high-level runtime mutation policy
-
-Design rules:
-
- render consumes snapshots plus render-local transient state
- render-local state is allowed if it stays render-local
- preview must be treated as best-effort relative to playout
-
-### `VideoBackend`
-
-`VideoBackend` owns input/output device lifecycle and playout policy.
-
-It is responsible for:
-
- input device configuration and callbacks
- output device configuration and callbacks
- frame scheduling policy
- buffer-pool ownership
- playout headroom policy
- input signal status
- backend state transitions and recovery logic
-
-It should not be responsible for:
-
- composing frames
- owning GL contexts long-term
- validating shader parameter changes
- persistence
-
-Design rules:
-
- this subsystem is the consumer of rendered output frames, not the owner of frame composition policy
- it should evolve toward producer/consumer playout rather than callback-driven rendering
- backend state should be explicit and reportable
-
-### `HealthTelemetry`
-
-`HealthTelemetry` owns structured operational visibility.
-
-It is responsible for:
-
- logging
- warning/error counters
- timing traces
- subsystem health state
- degraded-mode reporting
- operator-visible health summaries
-
-It should not be responsible for:
-
- deciding core app behavior
- owning render or backend state
- persistence policy
-
-Design rules:
-
- all major subsystems publish health information here
- health visibility should outlive UI connection state
- modal dialogs should not be the main operational surface
-
-## Target Dependency Rules
-
-The architecture should follow these rules as closely as possible.
-
-Allowed dependency directions:
-
- `ControlServices -> RuntimeCoordinator`
- `RuntimeCoordinator -> RuntimeStore`
- `RuntimeCoordinator -> RuntimeSnapshotProvider`
- `RuntimeCoordinator -> HealthTelemetry`
- `RuntimeSnapshotProvider -> RenderSnapshotBuilder`
- `RenderSnapshotBuilder -> RuntimeStore`
- `RenderEngine -> RuntimeSnapshotProvider`
- `RenderEngine -> HealthTelemetry`
- `VideoBackend -> RenderEngine`
- `VideoBackend -> HealthTelemetry`
-
-Conditionally allowed during migration:
-
- `ControlServices -> HealthTelemetry`
- `ControlServices -> RuntimeStore` only through temporary compatibility shims
-
-Not allowed in the target design:
-
- `RenderEngine -> RuntimeStore`
- `RenderEngine -> ControlServices`
- `VideoBackend -> RuntimeStore`
- `ControlServices -> RenderEngine` for direct mutation
- `RuntimeStore -> RenderEngine`
- `HealthTelemetry -> any subsystem` for control flow
-
-The key principle is:
-
- store owns durable data
- coordinator owns mutation policy
- snapshot provider owns render-facing state publication
- render owns live GPU execution
- backend owns device timing
- telemetry observes all of them
-
-## State Ownership Model
-
-The app has several different kinds of state, and Phase 1 should name them explicitly.
-
-### Persisted State
-
-Owned by `RuntimeStore`.
-
-Examples:
-
- layer stack structure
- selected shader ids
- saved parameter values
- runtime host config
- stack presets
-
-### Committed Live State
-
-Owned logically by `RuntimeCoordinator`, stored in the store or a live-state companion depending on future implementation.
-
-Examples:
-
- current operator-selected parameter values
- current bypass state
- current selected shader for each layer
-
-This is state that should normally survive until explicitly changed and can be persisted if policy says so.
-
-### Transient Live Overlay State
-
-Owned by the subsystem that consumes it, not by the persisted store.
-
-Examples:
-
- active OSC overlay targets while automation is flowing
- shader feedback buffers
- temporal history textures
- queued input frames
- in-flight preview state
- playout queue state
-
-This is where many current issues come from. The design rule is:
-
- transient state may influence output
- transient state should not masquerade as persisted truth
-
-### Health and Timing State
-
-Owned by `HealthTelemetry`.
-
-Examples:
-
- frame pacing stats
- render timing
- late/dropped frame counters
- queue depths
- warning states
-
-## Target Runtime Flow
-
-This section describes the intended long-term flow once later phases are in place.
-
-### Control Mutation Flow
-
-1. OSC/UI/file-watch input enters `ControlServices`.
-2. `ControlServices` normalizes it into an internal action or event.
-3. `RuntimeCoordinator` validates and classifies the action.
-4. If the action changes durable state, `RuntimeStore` is updated.
-5. If the action changes render-facing state, `RuntimeSnapshotProvider` publishes a new snapshot.
-6. If the action requires persistence, a persistence request is queued.
-7. Health/timing observations are emitted separately.
-
-### Render Flow
-
-1. `RenderEngine` consumes the latest published snapshot.
-2. `RenderEngine` combines that snapshot with render-local transient state.
-3. `RenderEngine` performs uploads, pass execution, feedback/history maintenance, and output production.
-4. `RenderEngine` produces:
-   - preview-ready output
-   - video-backend-ready output frames
-   - render timing and warning signals
-
-### Video Output Flow
-
-Target long-term flow:
-
-1. `RenderEngine` produces completed output frames ahead of demand.
-2. `VideoBackend` consumes those frames from a bounded queue or ring buffer.
-3. Device callbacks only drive dequeue/schedule/accounting behavior.
-4. `HealthTelemetry` records queue depth, lateness, underruns, and recovery events.
-
-### Reload / Shader Rebuild Flow
-
-1. file-watch or manual reload enters through `ControlServices`
-2. `RuntimeCoordinator` classifies the reload request
-3. `RuntimeStore` and shader/package metadata are refreshed if needed
-4. `RuntimeSnapshotProvider` republishes affected snapshot state
-5. `RenderEngine` rebuilds render-local resources from the new snapshot/build outputs
-
-The important boundary here is that reload is not "a render concern that also touches persistence." It is a coordinated runtime concern with a render-local execution phase.
-
-## Suggested Public Interfaces
-
-These are not final class signatures, but they show the shape the architecture should move toward.
-
-### `RuntimeStore`
-
-Core responsibilities:
-
- `LoadConfig()`
- `LoadPersistentState()`
- `BuildPersistentStateSnapshot(...)`
- `RequestPersistence(...)`
- `GetStoredLayerStack()`
- `SetStoredLayerStack(...)`
- `GetStackPresetNames()`
- `SaveStackPreset(...)`
- `LoadStackPreset(...)`
-
-### `RuntimeCoordinator`
-
-Core responsibilities:
-
- `ApplyControlMutation(...)`
- `ApplyAutomationTarget(...)`
- `ResetLayer(...)`
- `RequestReload(...)`
- `CommitOverlayState(...)`
- `PublishSnapshotIfNeeded()`
- `RequestPersistenceIfNeeded()`
-
-### `RuntimeSnapshotProvider`
-
-Core responsibilities:
-
- `BuildSnapshot(...)`
- `GetLatestSnapshot()`
- `GetSnapshotVersion()`
- `PublishSnapshot(...)`
-
-### `ControlServices`
-
-Core responsibilities:
-
- `StartOscIngress(...)`
- `StartWebControlIngress(...)`
- `StartFileWatchIngress(...)`
- `EnqueueControlAction(...)`
- `DrainServiceEvents(...)`
-
-### `RenderEngine`
-
-Core responsibilities:
-
- `StartRenderLoop(...)`
- `ConsumeSnapshot(...)`
- `EnqueueInputFrame(...)`
- `ProduceOutputFrame(...)`
- `ResetRenderLocalState(...)`
- `HandleRebuildOutputs(...)`
-
-### `VideoBackend`
-
-Core responsibilities:
-
- `ConfigureInput(...)`
- `ConfigureOutput(...)`
- `StartPlayout(...)`
- `StopPlayout(...)`
- `ConsumeRenderedFrame(...)`
- `ReportBackendState(...)`
-
-### `HealthTelemetry`
-
-Core responsibilities:
-
- `RecordTimingSample(...)`
- `RecordCounterDelta(...)`
- `RaiseWarning(...)`
- `ClearWarning(...)`
- `AppendLogEntry(...)`
- `BuildHealthSnapshot()`
-
-## Mapping From Current Code to Target Subsystems
-
-This is not a one-to-one rename plan. It is a responsibility migration map.
-
-### Previous `RuntimeHost`
-
-Should eventually split across:
-
- `RuntimeStore`
- `RuntimeCoordinator`
- `RuntimeSnapshotProvider`
- parts of `HealthTelemetry`
-
-Likely examples:
-
- config loading/path resolution -> `RuntimeConfigStore`
- persistent state saving -> `RuntimeStore`
- layer stack mutation validation -> `RuntimeCoordinator`
- render state building/versioning -> `RenderSnapshotBuilder`
- render snapshot publication/cache -> `RuntimeSnapshotProvider`
- timing/status setters -> `HealthTelemetry`
-
-### Current `RuntimeServices`
-
-Should eventually become mostly:
-
- `ControlServices`
- a small service-hosting shell
-
-Likely examples:
-
- OSC ingress/coalescing -> `ControlServices`
- file-watch ingress -> `ControlServices`
- deferred service coordination now done by polling -> split between `ControlServices` and event-driven coordinator calls
-
-### Current `OpenGLComposite`
-
-Should eventually split across:
-
- application bootstrap shell
- `RenderEngine`
- orchestration glue that wires subsystems together
-
-Likely examples:
-
- render-pass facing code -> `RenderEngine`
- app/service/backend bootstrap -> composition root
- runtime mutation API surface -> coordinator-facing adapter, not render owner
-
-### Current `OpenGLVideoIOBridge` and `DeckLinkSession`
-
-Should eventually align more clearly under:
-
- `VideoBackend`
- `RenderEngine`
-
-Likely examples:
-
- device callback and scheduling policy -> `VideoBackend`
- GL upload/readback/render work -> `RenderEngine`
-
-## Architectural Guardrails
-
-As later phases begin, these rules should be treated as guardrails.
-
-### 1. No new cross-cutting runtime object should be introduced
-
-If a new feature needs durable state, place it conceptually under `RuntimeStore`.
-If it needs render-local transient state, place it conceptually under `RenderEngine`.
-If it needs timing/status counters, place it conceptually under `HealthTelemetry`.
-
-### 2. Render-local state should stay render-local
-
-Do not push shader feedback, temporal history, preview caches, or playout queues back into the store just to make them easy to reach from other systems.
-
-### 3. Device callbacks should not become a dumping ground for app work
-
-Callback threads should converge toward signaling and queue management, not core rendering, persistence, or control mutation.
-
-### 4. Persistence should not be used as a control synchronization mechanism
-
-Saving state is not how subsystems discover changes. Published snapshots and explicit events should handle that.
-
-### 5. Health reporting should observe, not coordinate
-
-Telemetry systems may record warnings and degraded states, but they should not become the hidden control plane for the app.
-
-## Migration Strategy
-
-Phase 1 is a design phase, but it should support incremental migration.
-
-Recommended order after this document:
-
-1. Introduce names and interfaces before moving logic.
-2. Create compatibility adapters around the subsystem facades rather than forcing a flag day.
-3. Move read-only render snapshot publication out before moving all mutation logic.
-4. Move service ingress boundaries out before removing the old polling shell.
-5. Isolate timing/health setters from the core store as early as practical.
-
-This keeps progress measurable while reducing rewrite risk.
-
-## Suggested Deliverables for Completing Phase 1
-
-Phase 1 can reasonably be considered complete once the project has:
-
- this subsystem-boundary design document
- agreed subsystem names and responsibilities
- agreed allowed dependency directions
- explicit state categories: persisted, committed live, transient overlay, health/timing
- a current-to-target responsibility map for runtime services, `OpenGLComposite`, and backend/render bridge code
- a decision that later phases will build against this target rather than inventing new boundaries ad hoc
-
-By that definition, Phase 1 is complete for runtime: the design package is complete, `RuntimeHost` is retired from the compiled runtime path, runtime seams are represented in code, and runtime subsystem tests cover the new boundaries. App-wide ownership work continues in later phases.
-
-## Open Questions For Later Phases
-
-These do not block Phase 1, but they should remain visible.
-
- Should shader package registry ownership live entirely in `RuntimeStore`, or should compile-ready derived registry data move into the snapshot provider?
- Should committed live state be stored directly in `RuntimeStore`, or split into store plus live-session state owned by the coordinator?
- How much of shader build orchestration belongs to `RenderEngine` versus a separate build service?
- At what phase should preview become fully decoupled from playout cadence?
- Should persistence become its own `PersistenceWriter` subsystem in Phase 6, or remain an implementation detail under `RuntimeStore`?
-
-## Short Version
-
-Phase 1 should establish one simple rule for the rest of the refactor:
-
- durable state lives in the store
- mutation policy lives in the coordinator
- render-facing state is published as snapshots
- external control sources enter through services
- GL work belongs to render
- hardware pacing belongs to the backend
- health visibility belongs to telemetry
-
-If later phases keep to that rule, the architecture will become materially more resilient without needing another round of foundational boundary changes.
--- a/docs/PHASE_2_INTERNAL_EVENT_MODEL_DESIGN.md
+++ b/docs/PHASE_2_INTERNAL_EVENT_MODEL_DESIGN.md
@@ -1,660 +0,0 @@
-# Phase 2 Design: Internal Event Model
-
-This document expands Phase 2 of [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) into a concrete design target.
-
-Phase 1 established the subsystem vocabulary and moved the runtime path behind clearer collaborators. Phase 2 should now give those subsystems a safer way to coordinate than direct cross-calls, shared mutable result queues, and coarse polling loops.
-
-## Status
-
- Phase 2 design package: accepted.
- Phase 2 implementation: substantially complete for the coordination substrate.
- Current alignment: the typed event substrate, app-owned dispatcher, coalesced app pump, reload bridge events, production bridges, and event behavior tests are in place. Remaining items are narrow follow-ups rather than foundation work.
-
-The current repo now has concrete Phase 2 implementation footholds:
-
- `RuntimeEventType`, typed payload structs, `RuntimeEvent`, `RuntimeEventQueue`, `RuntimeEventDispatcher`, and `RuntimeEventCoalescingQueue` define the event substrate.
- `OpenGLComposite` owns one app-level `RuntimeEventDispatcher` and passes it into `RuntimeServices`, `RuntimeCoordinator`, `RuntimeUpdateController`, `RuntimeSnapshotProvider`, `ShaderBuildQueue`, and `VideoBackend`.
- `ControlServices` publishes typed OSC and runtime-state broadcast events and uses condition-variable wakeups with a fallback compatibility timer.
- `RuntimeCoordinator` publishes accepted, rejected, state-changed, persistence, reload, shader-build, and compile-status follow-up events.
- `RuntimeUpdateController` subscribes to event families for broadcast, shader build, compile status, render reset, and dispatcher health observations.
- `RuntimeSnapshotProvider` publishes render snapshot request/published events.
- `ShaderBuildQueue` and `RuntimeUpdateController` publish shader build lifecycle events with generation matching.
- `VideoBackend` publishes backend observation events and timing samples.
- `HealthTelemetry` receives dispatcher metrics directly and the event vocabulary now includes health observation events.
- Tests cover event type stability, payload mapping, FIFO dispatch, coalescing infrastructure, app-level coalesced broadcast/build behavior, handler failures, mutation follow-up behavior, reload bridge behavior, and shader-build generation behavior.
-
-The implementation is now established in the repo. The remaining Phase 2 follow-up work is small: add completion/failure observations where useful and keep the runtime-store poll fallback explicitly transitional until a later file-watch implementation replaces it.
-
-## Why Phase 2 Exists
-
-The resilience review originally called out three timing and ownership problems that an event model could directly improve:
-
- background service timing relied on coarse sleeps and polling
- control, reload, persistence, and render-update work traveled through mixed shared state and result queues
- later render/backend refactors need a stable coordination model before they move more work across threads
-
-The goal is not to make the app fully asynchronous in one pass. It is to introduce typed internal events so each subsystem can publish what happened without knowing who will react or how many downstream effects are needed.
-
-## Goals
-
-Phase 2 should establish:
-
- a small typed event vocabulary for control, runtime, render, backend, persistence, and health coordination
- one app-owned event pump or dispatcher that can route events deterministically
- bounded queues with clear ownership and no unbounded background growth
- wakeup-driven service coordination where practical, replacing coarse polling as the default shape
- explicit event-to-command boundaries so events do not become hidden global mutation APIs
- tests for event ordering, coalescing, rejection, and dispatch side effects
-
-## Non-Goals
-
-Phase 2 should not require:
-
- a dedicated render thread yet
- a full actor system
- lock-free queues everywhere
- background persistence implementation
- a complete DeckLink state machine
- final live-state layering
- replacing every direct call in one change
-
-Those are later phases. Phase 2 provides the coordination substrate they can build on.
-
-## Current Coordination Shape
-
-The current runtime is much cleaner than before Phase 1, and Phase 2 has moved the main coordination model toward typed publication and app-owned dispatch:
-
- `ControlServices` publishes OSC value, OSC commit, and runtime-state broadcast events.
- `ControlServices::PollLoop(...)` is wakeup-driven for queued OSC commit work, with a bounded fallback timer for compatibility polling.
- `RuntimeCoordinator` still returns `RuntimeCoordinatorResult` for synchronous callers, but also publishes accepted/rejected/follow-up events.
- `RuntimeUpdateController` subscribes to event families and applies many effects from events rather than only from drained result objects.
- shader-build request, readiness, failure, and application are represented by typed events.
- render snapshot publication and backend observations are represented by typed events.
- dispatcher queue metrics and handler failures feed telemetry and health observation events.
-
-There is still transitional bridge-state:
-
- `ControlServices` still exposes completed OSC commit notifications for render overlay settlement.
- `RuntimeEventCoalescingQueue` is now wired into the app-owned dispatcher for latest-value event types.
- `FileChangeDetected` and `ManualReloadRequested` are now published as reload ingress bridge events before coordinator reload follow-ups.
- runtime-state broadcast completion/failure events are still a target, not current behavior.
-
-That means Phase 2 is complete enough as the coordination substrate for later phases. The remaining items are refinement work and should not block moving to render ownership, live-state layering, or persistence work.
-
-## Event Model Principles
-
-### Events say what happened
-
-Events should describe facts:
-
- `OscValueReceived`
- `RuntimeMutationAccepted`
- `RuntimeMutationRejected`
- `ShaderReloadRequested`
- `ShaderBuildPrepared`
- `ShaderBuildFailed`
- `RenderSnapshotPublished`
- `RuntimeStateBroadcastRequested`
-
-They should not be vague commands like "do everything needed now."
-
-### Commands request intent
-
-Some work is still naturally command-shaped:
-
- "apply this parameter mutation"
- "request shader reload"
- "save this stack preset"
- "start backend output"
-
-Commands enter an owner subsystem. Events leave a subsystem after the owner has accepted, rejected, or completed work.
-
-### One owner mutates each state category
-
-Events must not become a way to bypass Phase 1 ownership:
-
- `RuntimeCoordinator` remains the owner of mutation policy.
- `RuntimeStore` remains the owner of durable state.
- `RuntimeSnapshotProvider` remains the owner of render snapshot publication.
- `RenderEngine` remains the owner of render-local transient state.
- `VideoBackend` remains the owner of device lifecycle and pacing.
- `HealthTelemetry` observes and reports, but does not coordinate behavior.
-
-### Event handlers should be small
-
-Handlers should translate events into owner calls or follow-up events. They should not accumulate hidden long-lived state unless that state belongs to the handler's subsystem.
-
-### Queues must be bounded or coalesced
-
-High-rate control traffic can arrive faster than the app should process every individual sample. Phase 2 should preserve the useful current behavior of coalescing OSC updates by route, but make the coalescing policy explicit.
-
-## Event Families
-
-### Control Events
-
-Produced by `ControlServices`.
-
-Examples:
-
- `OscValueReceived`
- `OscValueCoalesced`
- `OscCommitRequested`
- `HttpControlMutationRequested`
- `WebSocketClientConnected`
- `RuntimeStateBroadcastRequested`
- `FileChangeDetected`
- `ManualReloadRequested`
-
-Primary consumers:
-
- `RuntimeCoordinator`
- `HealthTelemetry`
- later, a persistence writer or diagnostics publisher
-
-### Runtime Events
-
-Produced by `RuntimeCoordinator`, `RuntimeStore`, and snapshot publication code.
-
-Examples:
-
- `RuntimeMutationAccepted`
- `RuntimeMutationRejected`
- `RuntimeStateChanged`
- `RuntimePersistenceRequested`
- `RuntimeReloadRequested`
- `ShaderPackagesChanged`
- `RenderSnapshotPublishRequested`
- `RuntimeStatePresentationChanged`
-
-Primary consumers:
-
- `RuntimeSnapshotProvider`
- `RenderEngine`
- `ControlServices`
- `HealthTelemetry`
- later, `PersistenceWriter`
-
-### Shader Build Events
-
-Produced by shader build orchestration and render-side build application.
-
-Examples:
-
- `ShaderBuildRequested`
- `ShaderBuildPrepared`
- `ShaderBuildApplied`
- `ShaderBuildFailed`
- `CompileStatusChanged`
-
-Primary consumers:
-
- `RenderEngine`
- `RuntimeCoordinator`
- `ControlServices`
- `HealthTelemetry`
-
-### Render Events
-
-Produced by `RenderEngine` and `RuntimeSnapshotProvider`.
-
-Examples:
-
- `RenderSnapshotPublished`
- `RenderResetRequested`
- `RenderResetApplied`
- `OscOverlayApplied`
- `OscOverlaySettled`
- `FrameRendered`
- `PreviewFrameAvailable`
-
-Primary consumers:
-
- `RenderEngine`
- `ControlServices`
- `VideoBackend`
- `HealthTelemetry`
-
-### Backend Events
-
-Produced by `VideoBackend` and backend adapters.
-
-Examples:
-
- `InputSignalChanged`
- `InputFrameArrived`
- `OutputFrameScheduled`
- `OutputFrameCompleted`
- `OutputLateFrameDetected`
- `OutputDroppedFrameDetected`
- `BackendStateChanged`
-
-Primary consumers:
-
- `RenderEngine`
- `HealthTelemetry`
- later, backend lifecycle state machine handlers
-
-### Health Events
-
-Produced by all major subsystems.
-
-Examples:
-
- `SubsystemWarningRaised`
- `SubsystemWarningCleared`
- `SubsystemRecovered`
- `TimingSampleRecorded`
- `QueueDepthChanged`
-
-Primary consumer:
-
- `HealthTelemetry`
-
-Health events should be observational. They should not be required for core behavior to proceed.
-
-## Event Envelope
-
-A practical initial event envelope can stay simple:
-
-```cpp
-enum class RuntimeEventType
-{
-	OscCommitRequested,
-	RuntimeMutationAccepted,
-	RuntimeMutationRejected,
-	RuntimeReloadRequested,
-	ShaderBuildRequested,
-	ShaderBuildPrepared,
-	ShaderBuildFailed,
-	RenderSnapshotPublishRequested,
-	RenderSnapshotPublished,
-	RuntimeStateBroadcastRequested,
-	BackendStateChanged,
-	SubsystemWarningRaised
-};
-
-struct RuntimeEvent
-{
-	RuntimeEventType type;
-	uint64_t sequence = 0;
-	std::chrono::steady_clock::time_point createdAt;
-	std::string source;
-	std::variant<
-		OscCommitRequestedEvent,
-		RuntimeMutationEvent,
-		ShaderBuildEvent,
-		RenderSnapshotEvent,
-		BackendEvent,
-		HealthEvent> payload;
-};
-```
-
-The exact C++ names can change. The key design requirements are:
-
- event type is explicit
- event order is observable
- source subsystem is recorded
- payload is typed, not a bag of optional strings
- timestamps exist for queue-age telemetry
- failures are events too, not just debug strings
-
-## Event Bus Shape
-
-Phase 2 does not need a large framework. A small app-owned dispatcher is enough.
-
-Suggested components:
-
- `RuntimeEventDispatcher`
-  - owns queues
-  - assigns sequence numbers
-  - exposes `Publish(...)`
-  - exposes `DispatchPending(...)`
- event handlers
-  - narrow handler interface or function callback
-  - registered by subsystem/composition root
- `RuntimeEventQueue`
-  - bounded FIFO for ordinary events
- `RuntimeEventCoalescingQueue`
-  - bounded keyed latest-value queue for flows such as high-rate OSC, broadcast requests, file/reload bursts, and queue-depth telemetry
- queue and dispatch metrics
-  - queue depth
-  - oldest event age
-  - dropped/coalesced counts
-
-Initial implementation is single-process and mostly single-dispatch-thread. The important part is that event publication and event handling are explicit.
-
-### Dispatcher Ownership Decision
-
-The first concrete implementation uses one app-owned `RuntimeEventDispatcher`.
-
-Ownership:
-
- `OpenGLComposite` owns the dispatcher as part of the current composition root.
-
-References:
-
- `RuntimeServices` receives the dispatcher and passes it to `ControlServices`.
- `RuntimeCoordinator` receives the dispatcher so coordinator outcomes can become explicit events.
- `RuntimeUpdateController` receives the dispatcher so it can become the first effect/apply handler.
- `RuntimeSnapshotProvider`, `ShaderBuildQueue`, and `VideoBackend` receive the dispatcher so snapshot, shader lifecycle, and backend observation events are visible.
-
-This is intentionally a composition-root dependency, not a new subsystem dependency. Subsystems should not construct their own dispatchers, and future tests should use `RuntimeEventTestHarness` rather than creating ad hoc event plumbing.
-
-The dispatcher should move out of `OpenGLComposite` only if a later application-shell/composition-root object replaces `OpenGLComposite` as the owner of subsystem wiring.
-
-## Queue Policy
-
-Not every event deserves the same queue semantics.
-
-### FIFO Events
-
-Use FIFO for events where every item matters:
-
- mutation accepted/rejected
- shader build completed/failed
- backend state changed
- warning raised/cleared
-
-### Coalesced Events
-
-Use coalescing for high-rate latest-value flows:
-
- OSC parameter target updates by route
- runtime-state broadcast requests
- file-change reload requests during a burst
- queue-depth telemetry
-
-Coalesced events should record how many updates were collapsed so telemetry can show pressure.
-
-### Synchronous Boundaries
-
-Some calls may remain synchronous during Phase 2:
-
- UI/API mutation calls that need an immediate success/error response
- startup configuration failures
- shutdown ordering
- tests
-
-The rule is that synchronous calls should still publish events for accepted/rejected/completed work, so the rest of the app does not need to infer side effects from the call path.
-
-## Event Bridge Policy
-
-This section is the implementation rulebook for converting existing direct calls and result queues into events. Future Phase 2 lanes should use this table unless they deliberately update the policy here first.
-
-### Bridge Categories
-
-| Bridge category | Use when | Queue shape | Handler expectation |
-| --- | --- | --- | --- |
-| `fifo-fact` | every occurrence matters and must be observed in order | bounded FIFO | handler consumes each event exactly once |
-| `coalesced-latest` | only the latest value per key matters | bounded coalescing queue | handler consumes the latest event and telemetry records collapsed count |
-| `sync-command-with-event` | caller needs an immediate success/error result | direct owner call plus follow-up event publication | handler must not be required for the caller's response |
-| `observation-only` | event is telemetry/diagnostic and must not drive core behavior | FIFO or coalesced depending on rate | handler failure must never block app behavior |
-| `compatibility-poll` | source cannot yet publish an event directly | temporary poll adapter publishes typed events | poll interval is wakeup-driven with a fallback timer until a later file-watch implementation replaces it |
-
-### Current Bridge Decisions
-
-| Current flow | Phase 2 bridge | Event(s) | Current status |
-| --- | --- | --- | --- |
-| OSC latest-value updates | `ControlServices` ingress bridge | `OscValueReceived`, optional `OscValueCoalesced` | Event publication exists; source-side pending map and app-level dispatcher coalescing both provide latest-value behavior. |
-| OSC commit after settle | `ControlServices -> RuntimeCoordinator` bridge | `OscCommitRequested`, then `RuntimeMutationAccepted` or `RuntimeMutationRejected` | Event publication exists. Coordinator follow-up work now reaches the app path through events rather than a service-result queue. |
-| HTTP/UI mutation needing response | direct call into `RuntimeCoordinator` | `RuntimeMutationAccepted` or `RuntimeMutationRejected` after the synchronous response path | Implemented as `sync-command-with-event`; synchronous response remains supported. |
-| runtime-state broadcast request | presentation/broadcast bridge | `RuntimeStatePresentationChanged`, `RuntimeStateBroadcastRequested` | Request event exists, is handled, and is coalesced by the app dispatcher. Completion/failure events remain follow-ups. |
-| manual reload button | control ingress bridge | `ManualReloadRequested`, then `RuntimeReloadRequested` | Ingress and follow-up events exist and are covered by tests. |
-| file watcher changes | file-watch bridge | `FileChangeDetected`, then `RuntimeReloadRequested` | Poll fallback remains, but detected changes now publish ingress and follow-up events and are covered by tests. |
-| runtime store poll fallback | compatibility poll adapter | `FileChangeDetected`, `RuntimeReloadRequested`, or warning/compile-status event | Still present by design as a transitional bridge with a condition-variable fallback timer. Detected changes publish ingress and follow-up events. |
-| shader build request | runtime/render bridge | `ShaderBuildRequested` | Event publication, handler, and app dispatcher coalescing exist. |
-| shader build ready/failure/apply | shader build lifecycle bridge | `ShaderBuildPrepared`, `ShaderBuildFailed`, `ShaderBuildApplied`, `CompileStatusChanged` | Implemented with generation matching. |
-| render snapshot publication | snapshot bridge | `RenderSnapshotPublishRequested`, `RenderSnapshotPublished` | Implemented. Publish requests are coalesced by output dimensions in the app dispatcher. |
-| render reset request/application | render bridge | `RenderResetRequested`, `RenderResetApplied` | Request handling exists; applied event coverage can be expanded in later render work. |
-| input signal changes | backend observation bridge | `InputSignalChanged` | Implemented as backend observation publication. |
-| output late/dropped/completed frames | backend timing bridge | `OutputFrameCompleted`, `OutputLateFrameDetected`, `OutputDroppedFrameDetected` | Implemented at the vocabulary and backend publication level. High-rate policy may be refined during backend lifecycle work. |
-| warnings and recovery | telemetry bridge | `SubsystemWarningRaised`, `SubsystemWarningCleared`, `SubsystemRecovered` | Vocabulary exists; direct telemetry writes still coexist with event observations. |
-| queue depth/timing samples | telemetry metrics bridge | `QueueDepthChanged`, `TimingSampleRecorded` | Implemented for dispatcher/backend observations and coalesced by metric key in the app dispatcher. |
-
-### Bridge Rules
-
- A bridge may translate an old direct call into an owner command, but it must publish the accepted/rejected/completed event that describes the outcome.
- A bridge must not mutate state owned by another subsystem just because it handles that subsystem's event.
- A coalesced event must have a stable key in code and a documented policy here.
- A FIFO event should be cheap enough that retaining every occurrence is useful. If not, turn it into a coalesced metric before putting it on a hot path.
- A synchronous bridge must treat event publication as a side effect of the owner decision, not as the mechanism that produces the direct caller's response.
- A compatibility poll adapter should be named as temporary in code so it does not become the new long-term coordination model.
- Handler failure should be reported through telemetry and dispatch metrics. It should not throw back across subsystem boundaries.
-
-### First Integration Recommendation
-
-The safest first behavior-changing bridge is `RuntimeStateBroadcastRequested`.
-
-It is low risk because:
-
- it is already a side effect of many coordinator outcomes
- duplicate requests are naturally coalescable
- the handler can call the existing `ControlServices::BroadcastState()` path
- success can be verified through existing UI behavior and event tests
-
-After that, the next bridge should be `ShaderBuildRequested`, because it already behaves like a queued side effect and has clear follow-up events.
-
-## Target Flow Examples
-
-### OSC Parameter Update
-
-1. `OscServer` decodes a packet.
-2. `ControlServices` publishes or coalesces `OscValueReceived`.
-3. The dispatcher routes the event to the render-overlay path or coordinator policy, depending on whether the value is transient or committing.
-4. `RuntimeCoordinator` publishes `RuntimeMutationAccepted` or `RuntimeMutationRejected` for committed changes.
-5. Accepted committed changes publish `RenderSnapshotPublishRequested` and `RuntimePersistenceRequested` as needed.
-6. `ControlServices` receives `RuntimeStateBroadcastRequested` or a presentation-changed event and broadcasts at its own cadence.
-
-### File Reload
-
-1. File-watch or manual reload produces `FileChangeDetected` or `ManualReloadRequested`.
-2. `ControlServices` coalesces reload bursts into one `RuntimeReloadRequested`.
-3. `RuntimeCoordinator` classifies the reload.
-4. Package/store refresh produces `ShaderPackagesChanged` if package metadata changed.
-5. Coordinator publishes `ShaderBuildRequested`.
-6. Shader build completion publishes `ShaderBuildPrepared` or `ShaderBuildFailed`.
-7. Render applies the ready build and publishes `ShaderBuildApplied`.
-
-### Runtime State Broadcast
-
-1. A mutation or reload publishes `RuntimeStatePresentationChanged`.
-2. `ControlServices` coalesces this into a broadcast request.
-3. The broadcast path asks `RuntimeStatePresenter` for the current presentation read model.
-4. `HealthTelemetry` records broadcast count, failures, and queue age.
-
-### Backend Signal Change
-
-1. Backend adapter detects input signal change.
-2. `VideoBackend` publishes `InputSignalChanged`.
-3. `HealthTelemetry` records the new signal status.
-4. Later phases may let the backend lifecycle state machine react to the same event.
-
-## Migration Plan
-
-### Step 1. Add Event Types And A Minimal Dispatcher
-
-Status: complete.
-
-Introduce:
-
- `RuntimeEvent`
- `RuntimeEventType`
- typed payload structs for the smallest useful event family
- `RuntimeEventBus` or equivalent dispatcher
-
-Start with events that do not change behavior:
-
- `RuntimeStateBroadcastRequested`
- `ShaderBuildRequested`
- `RuntimeMutationRejected`
- simple health/log observations
-
-### Step 2. Convert `RuntimeUpdateController` Into An Event Handler
-
-Status: complete for the Phase 2 target, with synchronous API helpers retained.
-
-`RuntimeUpdateController` is already close to an event effect applier. Phase 2 should narrow it into a handler for:
-
- coordinator outcome events
- shader build readiness events
- snapshot publication requests
- broadcast requests
-
-The class should stop being the place that polls every source of work.
-
-Current note: `RuntimeUpdateController` now subscribes to the dispatcher and handles broadcast, reload, shader build, compile status, render reset, and health observation paths. It still accepts synchronous `RuntimeCoordinatorResult` values for UI/API calls that need immediate success or error responses.
-
-### Step 3. Replace `ControlServices::PollLoop` Sleep With Wakeups
-
-Status: complete for OSC commit wakeups; runtime-store compatibility polling remains explicitly transitional.
-
-Keep coalescing, but replace the fixed `25 x Sleep(10)` cadence with:
-
- a condition variable or waitable event
- wakeups when OSC commit work arrives
- wakeups when file/reload work arrives
- a fallback timer only for compatibility polling that cannot yet be evented
-
-This is the most direct Phase 2 timing win.
-
-Current note: `ControlServices` now uses a condition variable and fallback timer. The fallback exists for runtime-store polling until a later file-watch implementation can replace scanning as the change source. Detected reload/file changes publish typed ingress and follow-up events.
-
-### Step 4. Route Shader Build Lifecycle Through Events
-
-Status: mostly complete.
-
-Turn the current request/apply/failure/success path into explicit events:
-
- `ShaderBuildRequested`
- `ShaderBuildPrepared`
- `ShaderBuildFailed`
- `ShaderBuildApplied`
- `CompileStatusChanged`
-
-This should preserve the current off-frame-path compile behavior while making readiness visible.
-
-Current note: request, prepared, failed, applied, and compile-status events exist. Generation-aware consumption is covered by tests. Request events are coalesced by build dimensions and preserve-feedback policy in the app dispatcher.
-
-### Step 5. Route Runtime Broadcasts Through Events
-
-Status: partially complete.
-
-Replace direct "broadcast now" decisions with:
-
- `RuntimeStatePresentationChanged`
- `RuntimeStateBroadcastRequested`
- `RuntimeStateBroadcastCompleted`
- `RuntimeStateBroadcastFailed`
-
-This keeps UI delivery in `ControlServices` while keeping presentation ownership in the runtime presentation layer.
-
-Current note: `RuntimeStateBroadcastRequested` exists, is coalesced by the app dispatcher, and is handled. Broadcast completion/failure events have not been added yet.
-
-### Step 6. Add Event Metrics
-
-Status: mostly complete for dispatcher metrics; broader health-event observation continues.
-
-Before using the event system for hotter paths, add metrics:
-
- event queue depth
- oldest event age
- event dispatch duration
- coalesced event count
- dropped event count
- handler failure count
-
-These should feed `HealthTelemetry`.
-
-Current note: queue depth, oldest-event age, dispatch duration, dropped count, coalesced count, and handler failure counts feed telemetry. Queue/timing events are also published and coalesced by metric key.
-
-## Dependency Rules
-
-Allowed:
-
- producers publish events to the bus
- the composition root registers handlers
- handlers call owner subsystem APIs
- `HealthTelemetry` observes event metrics and failures
-
-Avoid:
-
- subsystems subscribing directly to each other in constructors
- event handlers mutating state outside their owner subsystem
- using one global event payload with many nullable fields
- making render hot paths block on the event bus
- requiring health/telemetry event delivery for core behavior
-
-The dispatcher is coordination infrastructure, not a new domain owner.
-
-## Testing Strategy
-
-Phase 2 should add tests that do not require GL, DeckLink, or network sockets.
-
-Implemented tests:
-
- FIFO events dispatch in sequence order
- coalesced events keep the latest payload and count collapsed updates
- rejected mutations publish rejection events without downstream snapshot/build events
- accepted parameter mutations publish the expected follow-up event set
- handler failures are reported as health/log events
- queue depth and oldest-event-age metrics update predictably
- typed payload mapping covers persistence, render snapshot, backend, timing, queue-depth, and late/dropped output-frame events
- shader build generation matching applies only the expected prepared build
-
-Remaining useful tests before deeper file-watch work:
-
- file reload bursts collapse into one reload request across a real poll burst
- broadcast completion/failure events are observable once those payloads exist
-
-The existing `RuntimeEventTypeTests` target is now the main pure event behavior harness. `RuntimeEventTestHarness` should remain the shared test helper so future lanes do not invent their own dispatcher plumbing.
-
-## Phase 2 Exit Criteria
-
-Phase 2 can be considered complete once the project can say:
-
- [x] there is a typed internal event envelope and dispatcher
- [x] `OpenGLComposite` owns the dispatcher as the current composition root
- [x] `ControlServices` emits typed events for OSC commits and broadcast requests
- [x] reload/file-change work publishes typed ingress and follow-up events
- [x] `RuntimeCoordinator` publishes explicit accepted/rejected/follow-up events
- [x] callers no longer need broad compatibility result queues for normal runtime side effects
- [x] `RuntimeUpdateController` handles event-driven broadcast, shader build, compile status, render reset, and health observation paths
- [x] `RuntimeUpdateController` no longer needs compatibility result draining for ordinary service work
- [x] shader build request/readiness/failure/application is represented as events
- [x] shader build requests are coalesced by dimensions and preserve-feedback policy in the app path
- [x] render snapshot publication is represented as request/published events
- [x] render snapshot publish requests are coalesced in the app path where needed
- [x] backend observations publish typed events
- [x] event queues expose depth, age, dropped, coalescing, and failure metrics
- [x] production event paths use coalescing for broadcast requests, shader-build requests, and high-rate metrics
- [x] coarse sleep polling is no longer the default coordination model for OSC commit service work
- [x] runtime-store/file-change compatibility polling is explicitly contained and publishes event-first reload bridge events when changes are detected
-
-Phase 2 closure note:
-
- The checklist above is complete for the internal event model substrate.
- Broadcast completion/failure events and real file-watch burst tests are useful follow-ups, but they are no longer foundation blockers.
- `RuntimeCoordinatorResult` may remain as a synchronous return type for command APIs; the Phase 2 requirement is that accepted/rejected/follow-up behavior is also published as typed events, which is now true.
-
-## Open Questions For Implementation
-
- Resolved: the first dispatcher is single-process, app-owned, and pumped through the current app/update path.
- Resolved: event payloads use typed structs carried by `std::variant`.
- Resolved: persistence requests are represented in Phase 2 even though background persistence lands later.
- Resolved: backend callback events are introduced now as observation-only events.
- Still open: should high-rate OSC transient overlay events enter the app dispatcher, or should they remain source-local until the live-state layering phase?
- Resolved for Phase 2: `RuntimeCoordinatorResult` can survive as a synchronous helper for command APIs, as long as event publication remains the coordination path for downstream effects.
- Resolved: app-level coalescing lives inside `RuntimeEventDispatcher`; source-specific bridges can still coalesce before publication when they own useful domain-specific collapse policy.
-
-## Short Version
-
-Phase 2 should give the app a typed nervous system.
-
- external inputs become typed events
- owner subsystems still make decisions
- decisions publish explicit outcomes
- follow-up work is routed by handlers, not inferred from scattered call paths
- high-rate work is bounded or coalesced
- timing and queue pressure become observable
-
-If this boundary holds, later render-thread, persistence, backend, and telemetry work can move independently without returning to shared-object polling as the default coordination model.
--- a/docs/PHASE_3_LIVE_STATE_SERVICE_COORDINATION_DESIGN.md
+++ b/docs/PHASE_3_LIVE_STATE_SERVICE_COORDINATION_DESIGN.md
@@ -1,383 +0,0 @@
-# Phase 3 Design: Live State And Service Coordination
-
-This document expands Phase 3 of [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) into a concrete design target.
-
-Phase 1 split runtime responsibilities into named subsystems. Phase 2 added the typed internal event model those subsystems can coordinate through. Phase 3 should now finish the service-facing and live-state cleanup needed before the app attempts sole-owner GL rendering.
-
-## Status
-
- Phase 3 design package: accepted.
- Phase 3 implementation: exit criteria satisfied for the current architecture.
- Current alignment: the repo now has the live-state/composer building blocks, a service bridge, and a named frame-state handoff. `OpenGLComposite::renderEffect()` still remains the app-level frame entrypoint, but the service drain, layer-state resolution, and OSC commit handoff now sit behind named helpers and frame-state data.
-
-Current footholds:
-
- `RuntimeStore` is split into durable state collaborators: `RuntimeConfigStore`, `LayerStackStore`, `ShaderPackageCatalog`, `RenderSnapshotBuilder`, presentation read models, and `HealthTelemetry`.
- `RuntimeCoordinator` owns mutation validation/classification and publishes accepted/rejected/follow-up events.
- `RuntimeSnapshotProvider` publishes render snapshots from `RenderSnapshotBuilder`.
- `RuntimeLiveState` owns transient OSC overlay bookkeeping and commit-settlement policy.
- `RenderStateComposer` exists as the first pure composition boundary for combining base layer state with live overlays.
- `RenderFrameInput` / `RenderFrameState` now provide a named frame-facing handoff model for preparing layer state and render inputs before drawing.
- `RenderFrameStateResolver` now owns snapshot cache selection, parameter refresh decisions, and final frame-state resolution before drawing.
- `RenderEngine` owns GL/render resources and delegates frame-state preparation to the resolver.
- `ControlServices` owns OSC ingress, pending OSC updates, completed OSC commit notifications, and service start/stop.
- `RuntimeServiceLiveBridge` translates service OSC queues into render live-state updates and queues settled overlay commit requests.
- `RuntimeEventDispatcher` now routes accepted mutations, reloads, snapshots, shader build events, backend observations, and health observations.
-
-The current architecture is much better than the original `RuntimeHost` shape. Phase 4 has since moved normal runtime GL work onto the `RenderEngine` render thread, so the remaining render-facing risk is no longer shared context ownership; it is the later producer/consumer playout work needed to keep DeckLink callbacks from synchronously waiting on output production.
-
-## Why Phase 3 Exists
-
-The resilience review says render-thread isolation should come after state access and control coordination are no longer centered on a large mutable runtime object. Phase 2 gives us the event substrate; Phase 3 should make the data flowing into render explicit enough that Phase 4 can make the render thread the sole GL owner without dragging service coordination and state reconciliation with it.
-
-The main problems Phase 3 addressed:
-
- transient OSC overlay state and persisted committed state needed a named reconciliation boundary
- `RenderEngine` needed to move final frame-state selection and value composition out of drawing code
- service-side queues for pending OSC updates and completed OSC commits needed a bridge outside `OpenGLComposite`
- Phase 6 has since moved runtime-state persistence requests onto a debounced background writer
- `RuntimeUpdateController` still exists partly as compatibility glue between synchronous coordinator results and event-driven effects
-
-## Goals
-
-Phase 3 should establish:
-
- an explicit live-state model separating persisted state, committed runtime state, and transient automation overlay
- service-facing event bridges for OSC overlay updates and overlay commit completions
- a narrower `OpenGLComposite::renderEffect()` that renders a prepared read model instead of orchestrating runtime/service state
- a clear owner for final render-layer state resolution before it reaches GL drawing
- a contained persistence request model that later Phase 6 connected to the background writer
- tests for live-state composition, overlay settlement, and service-to-runtime event behavior without GL or DeckLink
-
-## Non-Goals
-
-Phase 3 should not require:
-
- a dedicated render thread
- moving all GL calls off the current callback path
- a background persistence writer implementation
- a final DeckLink lifecycle state machine
- replacing every direct synchronous command API
- a final cue/preset/timeline system
-
-Those are later phases. Phase 3 is about making state and service coordination clean enough for those later phases.
-
-## Current Coordination Shape
-
-`OpenGLComposite::renderEffect()` is now the app-level frame entrypoint, but it is intentionally narrow:
-
-1. pumps `RuntimeUpdateController::ProcessRuntimeWork()`
-2. builds a `RenderFrameInput`
-3. renders the frame through `RuntimeServiceLiveBridge`, `RenderFrameStateResolver`, and `RenderEngine`
-
-The bridge now owns service queue draining, live automation settlement, committed/live state selection, and OSC commit handoff. `RenderFrameStateResolver` owns snapshot cache selection, parameter refresh decisions, and dynamic render-field refresh before handing a prepared frame state to `RenderEngine`.
-
-## Target State Model
-
-Phase 3 should formalize three state categories:
-
-| State category | Owner | Lifetime | Render role |
-| --- | --- | --- | --- |
-| Persisted layer state | `LayerStackStore` behind `RuntimeStore` | saved durable state | base layer stack and saved parameter values |
-| Committed runtime state | `RuntimeCoordinator` / snapshot publication | accepted operator/UI/OSC commits | stable render snapshot selected for rendering |
-| Transient automation overlay | new live-state collaborator or narrowed render-side owner | high-rate OSC automation between commits | temporary per-route override blended into final values |
-
-Render should eventually consume:
-
-```text
-final render state = published snapshot + committed live selection + transient overlay
-```
-
-The important change is not the exact formula name. The important change is that final render-state composition has one named owner and can be tested without GL.
-
-## Phase 3 Collaborators
-
-### `RuntimeLiveState`
-
-Small runtime collaborator for transient automation state.
-
-Responsibilities:
-
- keep transient OSC overlay values keyed by route
- track overlay generation and pending commit generation
- apply overlay commit completions
- decide when an overlay value has settled enough to request a commit
- build a `LiveStateOverlaySnapshot` for final render-state composition
-
-Non-responsibilities:
-
- persistent state mutation
- shader package lookup
- GL resources
- OSC socket ownership
-
-### `RenderStateComposer`
-
-Pure or mostly pure collaborator for frame value composition.
-
-Responsibilities:
-
- combine published render snapshots with live overlay state
- apply smoothing/time-based automation policy
- return final `RuntimeRenderState` values plus any commit requests
- stay testable without OpenGL
-
-Non-responsibilities:
-
- drawing
- service queue draining
- disk persistence
- OSC packet parsing
-
-### `RuntimeServiceLiveBridge`
-
-`RuntimeServiceLiveBridge` is the current source-local bridge between services, live state, and render-state preparation.
-
-Responsibilities:
-
- translate service-side OSC ingress into typed events or live-state commands
- publish overlay applied/settled events where useful
- route overlay commit requests to `RuntimeCoordinator`
- keep `OpenGLComposite` out of service queue draining
-
-Non-responsibilities:
-
- final GL rendering
- persistent store mutation outside coordinator APIs
-
-## Event Bridge Targets
-
-| Current flow | Phase 3 bridge target | Notes |
-| --- | --- | --- |
-| pending OSC updates drained by `OpenGLComposite` | `OscValueReceived` -> live-state overlay update handler | Phase 2 already has the event type; Phase 3 decides whether transient overlay updates enter the app dispatcher or a source-local bridge. |
-| render asks for overlay commit requests | `OscOverlaySettled` or direct coordinator command plus event publication | Commit request creation should leave `renderEffect()` and live near the live-state owner. |
-| completed OSC commits drained by `OpenGLComposite` | `RuntimeMutationAccepted` / completion event -> live-state commit completion | Completed commit routing should be event-driven or owned by live-state service bridge. |
-| `RenderFrameStateResolver::Resolve(...)` | `RenderStateComposer::BuildFrameState(...)` | Keep final state composition testable without GL. |
-| direct persistence writes from store mutations | `RuntimePersistenceRequested` as the durable write trigger | Phase 6 later connected this request boundary to the background writer. |
-| runtime-state broadcast side effects | `RuntimeStateBroadcastRequested` plus optional completed/failed observations | Keep broadcast delivery in services and presentation ownership in runtime presentation. |
-
-## Runtime Store Scope In Phase 3
-
-`RuntimeStore` is already much smaller than the original host, but Phase 3 should keep narrowing it toward durable state and read-model publishing.
-
-Target responsibilities:
-
- initialize runtime config and persistent state
- expose durable layer/package/config read models
- own saved layer stack and preset serialization while exposing snapshots for the background writer
- publish or support immutable render/presentation snapshots
-
-Avoid adding:
-
- transient OSC overlay state
- frame-local render composition decisions
- service queue coordination
- background worker policy
-
-## Runtime Coordinator Scope In Phase 3
-
-`RuntimeCoordinator` should remain the command/mutation policy owner.
-
-Keep:
-
- validation/classification
- accepted/rejected mutation publication
- reload/build/persistence follow-up events
- synchronous command results for UI/API callers that need immediate success or error
-
-Narrow:
-
- any behavior that looks like render-frame state composition
- any direct service queue interpretation
- any persistence timing policy beyond publishing `RuntimePersistenceRequested`
-
-## Render Engine Scope In Phase 3
-
-`RenderEngine` should move closer to being a GL/render-local owner.
-
-Keep:
-
- GL resources
- shader programs
- render passes
- preview/output rendering
- temporal history and feedback resources
-
-Move or narrow:
-
- transient OSC overlay bookkeeping
- final layer-state composition from snapshot plus overlay
- creation of commit requests from smoothed overlay values
-
-Some transient render-only state may remain in `RenderEngine` if it truly belongs to GL or temporal resources. But value composition should be separable from drawing.
-
-## OpenGLComposite Scope In Phase 3
-
-`OpenGLComposite` should remain the current composition root, but not the runtime-service coordinator.
-
-Target:
-
- wire collaborators
- own app-level lifecycle
- initialize GL/backend/runtime services
- call narrow render/update entrypoints
-
-Avoid:
-
- draining OSC queues directly
- converting service DTOs into render DTOs
- deciding final layer-state composition
- coordinating commit completion settlement
-
-## Persistence Position
-
-Phase 3 did not implement the background writer, but it prepared the request boundary that Phase 6 now uses.
-
-Target behavior by Phase 3 exit:
-
- state mutations publish `RuntimePersistenceRequested`
- persistence can be observed and tested as an event side effect
- disk writes are not inferred by callers; later Phase 6 routes accepted durable mutations through `RuntimePersistenceRequested` and the background writer
- callers outside the store/coordinator should not infer disk writes from mutation categories
-
-This kept Phase 6 smaller: the background snapshot writer consumes persistence requests and stored-state snapshots rather than rediscovering mutation policy.
-
-## Migration Plan
-
-### Step 1. Name The Live State Boundary
-
-Introduce `RuntimeLiveState`, `RenderStateComposer`, or an equivalent pair of classes.
-
-Start by moving pure data operations out of frame rendering without changing behavior.
-
-Status: complete for Phase 3. `runtime/live/RuntimeLiveState` and `runtime/live/RenderStateComposer` exist, are included in the build, and have a focused `RuntimeLiveStateTests` target.
-
-### Step 2. Move OSC Overlay Bookkeeping Behind The Boundary
-
-Move these responsibilities out of the current frame orchestration:
-
- overlay updates by route
- commit completion tracking
- generation matching
- settle/commit request creation
-
-The first implementation can still be called synchronously from the current render path. The important part is that the behavior has a named owner and tests.
-
-Status: complete for Phase 3. `RenderEngine` still exposes compatibility methods used by the service bridge, but it delegates overlay updates, commit completions, smoothing, generation matching, and commit-request creation to `RuntimeLiveState`/`RenderStateComposer`.
-
-### Step 3. Bridge Service Queues To Events Or Live-State Commands
-
-Replace `OpenGLComposite::renderEffect()` queue draining with a bridge that publishes or applies:
-
- `OscValueReceived`
- `OscOverlayApplied`
- `OscOverlaySettled`
- overlay commit completion observations
-
-This is where the remaining Phase 2 open question about transient OSC overlay event scope should be resolved for the current architecture.
-
-Status: complete for Phase 3. `RuntimeServiceLiveBridge` now drains pending OSC updates and completed OSC commits, applies them to render live state, and queues settled commit requests. It remains a source-local bridge by design until later live-state layering decides whether transient automation should enter the app-level dispatcher.
-
-### Step 4. Narrow `OpenGLComposite::renderEffect()`
-
-Target shape:
-
-```cpp
-void OpenGLComposite::renderEffect()
-{
-    mRuntimeUpdateController->ProcessRuntimeWork();
-    const RenderFrameInput frameInput = BuildRenderFrameInput();
-    RenderFrame(frameInput);
-}
-```
-
-The exact names can change. The goal is that render effect no longer manually drains services, settles overlay commits, and resolves layer values.
-
-Status: complete for Phase 3. `OpenGLComposite::renderEffect()` now processes runtime work, builds `RenderFrameInput`, and calls a narrow frame-render helper. Service draining, state resolution, and commit handoff sit behind `RuntimeServiceLiveBridge::PrepareLiveRenderFrameState(...)`, `RenderFrameStateResolver`, and `RenderFrameState`.
-
-### Step 5. Add Persistence Boundary Tests
-
-Add behavior tests for:
-
- accepted persisted mutations publish `RuntimePersistenceRequested`
- transient OSC commits do not force immediate persistence
- preset load/save persistence requests remain explicit
- rejected mutations do not publish persistence work
-
-Status: complete for Phase 3. `RuntimeSubsystemTests` and `RuntimeEventTypeTests` cover accepted mutation persistence requests, rejected mutations, and transient OSC overlay behavior that does not request persistence.
-
-### Step 6. Update Docs And Phase 4 Readiness
-
-Before calling Phase 3 complete, update:
-
- subsystem docs for new live-state/composer collaborators
- architecture review checklist
- Phase 4 assumptions about render thread input state
-
-Status: complete. The Phase 4 design note started from the `RenderFrameInput` / `RenderFrameState` contract and has now completed the shared-GL ownership migration.
-
-## Testing Strategy
-
-Phase 3 tests should avoid GL, DeckLink, and sockets.
-
-Recommended tests:
-
- final layer-state composition applies snapshot values when no overlay exists
- transient overlay overrides the matching parameter by route
- smoothing moves toward target values over time
- overlay settle creates one commit request per route/generation
- completed commits clear pending overlay commit state
- stale commit completions are ignored by generation
- accepted mutations publish persistence requests where expected
- rejected mutations do not publish persistence or render follow-ups
- `OpenGLComposite` no longer needs to drain service result queues for runtime effects
-
-Existing useful homes:
-
- `RuntimeSubsystemTests` for pure state/composer behavior
- `RuntimeEventTypeTests` for event bridge behavior
- `RuntimeLiveStateTests` for the new live-state/composer boundary
-
-## Parallel Work Lanes
-
-The current groundwork is intended to let these lanes proceed in parallel with low overlap:
-
-| Lane | Primary files | Goal |
-| --- | --- | --- |
-| A. Live-state behavior | `runtime/live/RuntimeLiveState.*`, `tests/RuntimeLiveStateTests.cpp` | Implemented for Phase 3: stale completion, smoothing, trigger behavior, and overlay settle policy are covered by focused tests. |
-| B. Render-state composition | `runtime/live/RenderStateComposer.*`, `gl/frame/RenderFrameStateResolver.*`, `gl/RenderEngine.*` | Implemented for Phase 3: value composition and frame-state selection sit outside GL drawing while GL calls remain in `RenderEngine`. |
-| C. Service bridge | `control/RuntimeServices.*`, `control/RuntimeServiceLiveBridge.*`, `control/ControlServices.*` | Implemented for Phase 3: `OpenGLComposite::renderEffect()` no longer drains OSC update/completion queues directly. |
-| D. App-frame orchestration | `gl/composite/OpenGLComposite.*`, `gl/frame/RuntimeUpdateController.*` | Implemented for Phase 3: render-effect glue is a narrow runtime-work, frame-input, render-frame sequence. |
-| E. Persistence boundary | `runtime/coordination/RuntimeCoordinator.*`, `runtime/store/*`, event tests | Implemented for Phase 3: persistence request publication is explicit. Phase 6 later wired those requests to the background writer. |
-
-## Phase 3 Exit Criteria
-
-Phase 3 can be considered complete once the project can say:
-
- [x] final render-state composition has named owners outside `OpenGLComposite` (`RenderStateComposer` covers live value composition; `RenderFrameStateResolver` covers snapshot/cache selection and frame-state resolution)
- [x] transient OSC overlay state has a named owner and tests
- [x] overlay commit requests and completions no longer require `OpenGLComposite` to drain service queues directly
- [x] `RenderEngine` is closer to GL/render resource ownership and less responsible for value composition
- [x] `RuntimeStore` remains durable-state focused and does not gain live overlay responsibilities
- [x] persistence requests are explicit event outcomes for persisted mutations
- [x] Phase 4 can define a render-thread input contract around immutable or near-immutable frame state
-
-## Open Questions
-
- Should transient OSC overlay values enter the app-level event dispatcher, or should they use a dedicated source-local latest-value bridge until live-state layering is finalized?
- Should the new live-state owner live under `runtime/`, `gl/`, or a new `renderstate/` boundary?
- Should smoothing policy be owned by live state, render-state composition, or render settings?
- Should overlay commit completion be represented as a new typed event, or derived from existing accepted mutation events with route/generation metadata?
- Should preset save remain synchronous after Phase 6, or eventually move behind a completion-based async request?
-
-## Short Version
-
-Phase 3 should make the app's live state boring and explicit.
-
- persisted state stays in the store
- accepted command policy stays in the coordinator
- transient automation gets a named owner
- final render-state composition becomes testable without GL
- `OpenGLComposite` stops manually reconciling service queues and layer values
-
-Once that is true, Phase 4 can make the render thread the sole GL owner without also having to invent a clean state model at the same time.
--- a/docs/PHASE_4_RENDER_THREAD_OWNERSHIP_DESIGN.md
+++ b/docs/PHASE_4_RENDER_THREAD_OWNERSHIP_DESIGN.md
@@ -1,408 +0,0 @@
-# Phase 4 Design: Render Thread Ownership
-
-This document expands Phase 4 of [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) into a concrete design target.
-
-Phase 1 named the subsystems. Phase 2 added the typed event substrate. Phase 3 made render-facing live state explicit through `RuntimeLiveState`, `RenderStateComposer`, `RenderFrameInput`, `RenderFrameState`, `RenderFrameStateResolver`, and `RuntimeServiceLiveBridge`. Phase 4 can now focus on the core timing-risk boundary: making one render thread the only owner of OpenGL work.
-
-## Status
-
- Phase 4 design package: implemented.
- Phase 4 implementation: complete for GL ownership. `RenderEngine` starts a dedicated render thread, owns the GL context during normal runtime work, and exposes queue/request entrypoints for input upload, output render, preview presentation, screenshot capture, shader rebuild application, and render-local resets.
- Current alignment: normal runtime GL work is routed through the render thread after startup. Startup initialization still runs before the render thread starts while the app explicitly owns the context, and shutdown now stops DeckLink/backend work before destroying render-thread GL resources and deleting the context.
-
-Current GL ownership footholds:
-
- `RenderEngine` owns GL resources, a dedicated render thread, synchronous request/response for output frames, a small render command mailbox, named render-thread helper methods, and wrong-thread diagnostics for those helpers.
- `RenderFrameInput` / `RenderFrameState` provide the frame-state contract that a render thread can consume.
- `RenderFrameStateResolver` prepares the render-facing layer state before drawing.
- `OpenGLVideoIOBridge` calls `RenderEngine::QueueInputFrame(...)` from the input path and `RenderEngine::RequestOutputFrame(...)` from the output path.
- `OpenGLComposite::paintGL(...)`, screenshot capture, input upload, and output rendering enter render work through explicit `RenderEngine` requests. After `OpenGLComposite::Start()` starts the render thread, those requests do not bind the GL context on the caller thread.
-
-## Why Phase 4 Exists
-
-The resilience review identifies shared GL ownership as the main remaining timing and failure-isolation risk. Today the shared context lock protects correctness, but it does not isolate timing:
-
- input callbacks can attempt texture upload
- output callbacks can trigger frame rendering and readback
- preview paint can enter the same GL context
- screenshot capture can enter the same GL context
- the DeckLink completion path is still too close to render work
-
-That means brief input, preview, readback, or callback stalls can still collide on the most timing-sensitive path.
-
-Phase 4 should turn GL from a shared resource guarded by a lock into a resource owned by one thread with explicit queues and handoff points.
-
-## Goals
-
-Phase 4 should establish:
-
- one render thread as the sole long-lived owner of the GL context
- non-render threads enqueue work instead of binding the GL context
- input upload requests are accepted and executed by the render thread
- output frame rendering is requested or scheduled through render-owned work
- preview and screenshot requests become render-thread commands or consumers
- `RenderFrameInput` / `RenderFrameState` become the stable data contract for frame production
- GL context entrypoints are reduced to render-thread-only code paths
- tests for queue semantics and request coalescing without requiring DeckLink hardware, plus explicit lifecycle ordering in code
-
-## Non-Goals
-
-Phase 4 should not require:
-
- the final producer/consumer playout queue for DeckLink
- the final DeckLink lifecycle state machine
- replacing the async readback policy
- implementing background persistence
- completing Phase 5's deeper live-state layering
- replacing every UI or backend API at once
-
-Those are later phases or follow-on work. Phase 4 is about making GL ownership deterministic first.
-
-## Current GL Entry Points
-
-The current code paths that matter most are:
-
-| Entry point | Current behavior | Phase 4 direction |
-| --- | --- | --- |
-| `RenderEngine::QueueInputFrame(...)` | copies the latest input frame into the render mailbox and returns without waiting for GL | render thread uploads latest input without callback-owned GL |
-| `RenderEngine::RequestOutputFrame(...)` | synchronous output request; after render-thread startup it queues output render work and waits for render-thread completion with timeout/failure reporting | render thread executes output frame production |
-| `RenderEngine::TryPresentPreview(...)` | best-effort request; callers queue preview presentation and return | render thread consumes latest completed frame for preview |
-| `RenderEngine::RequestScreenshotCapture(...)` | queues screenshot capture and async disk write completion | screenshot capture is a render-thread command |
-| `OpenGLVideoIOBridge::UploadInputFrame(...)` | copies the latest input frame into the render mailbox and returns without waiting for GL | render thread uploads the latest queued input frame |
-| `OpenGLVideoIOBridge::RenderScheduledFrame(...)` | requests render-thread output production and reports success/failure to the backend | consume render-produced output without callback-owned GL |
-
-## Target Ownership Model
-
-### Render Thread
-
-The render thread should own:
-
- `wglMakeCurrent(...)` for the rendering context
- all GL resource creation/destruction
- input texture upload
- pass execution
- output pack conversion
- async readback buffers and fences
- preview presentation or preview frame publication
- screenshot readback
- temporal history and feedback resources
-
-### Other Threads
-
-Other threads may:
-
- enqueue input frames or replace the latest input frame
- publish control/runtime/backend events
- request shader build application
- request render-local resets
- request screenshots
- consume ready output frames or receive completion notifications
-
-Other threads should not:
-
- call GL directly
- bind or unbind the render context
- wait on GL fences directly
- mutate render-local resource state
-
-## Proposed Collaborators
-
-### `RenderThread`
-
-Owns the OS thread, wakeup primitive, lifecycle, and render-loop execution.
-
-Responsibilities:
-
- start and stop the render thread
- bind the GL context for the thread lifetime or render-loop lifetime
- drain render commands
- execute frame production work
- publish lifecycle and failure observations
-
-Non-responsibilities:
-
- runtime mutation policy
- DeckLink scheduling policy
- durable persistence
-
-### `RenderCommandQueue`
-
-Small bounded queue or command mailbox for render-thread work.
-
-Current implementation:
-
- `RenderCommandQueue` exists as a pure C++ mailbox helper.
- Preview present and screenshot capture requests use latest-value coalescing.
- Input upload requests use latest-value coalescing with owned frame bytes copied at enqueue time.
- Output frame requests use FIFO semantics so scheduled output demand is not collapsed.
- Render-local reset requests coalesce to the strongest pending reset scope.
- Output frame requests use synchronous request/response through the render thread as the remaining transitional playout bridge.
-
-Possible commands:
-
- `UploadInputFrame`
- `RenderOutputFrame`
- `PrepareFrameState`
- `ApplyShaderBuild`
- `ResetTemporalHistory`
- `ResetShaderFeedback`
- `PresentPreview`
- `CaptureScreenshot`
- `Stop`
-
-High-rate commands should be coalesced where appropriate. Input frames should likely be latest-value rather than unbounded FIFO.
-
-### `RenderFrameCoordinator`
-
-Optional helper that combines Phase 3's frame contract with render-thread execution.
-
-Responsibilities:
-
- build or receive `RenderFrameInput`
- call `RuntimeServiceLiveBridge` and `RenderFrameStateResolver`
- hand `RenderFrameState` to `RenderEngine`
-
-This can begin as a thin helper. The important part is that it keeps frame-state preparation explicit when `renderEffect()` stops being called directly from the callback path.
-
-### `RenderOutputMailbox`
-
-Optional transitional bridge for output frames.
-
-Responsibilities:
-
- hold the latest completed output frame or a small bounded set
- let backend code consume output without owning GL
- report underrun/stale-frame reuse observations
-
-This may be a Phase 4 late step or a Phase 7 playout-policy step. Phase 4 should at least avoid designing the render thread in a way that blocks it.
-
-## Threading Contract
-
-Phase 4 should make thread ownership visible in APIs.
-
-Candidate naming:
-
- `RenderEngine::StartRenderThread(...)`
- `RenderEngine::StopRenderThread()`
- `RenderEngine::EnqueueInputFrame(...)`
- `RenderEngine::RequestOutputFrame(...)`
- `RenderEngine::RequestPreviewPresent(...)`
- `RenderEngine::RequestScreenshot(...)`
-
-Render-thread-only methods should be private or clearly named:
-
- `RenderEngine::UploadInputFrameOnRenderThread(...)`
- `RenderEngine::RenderOutputFrameOnRenderThread(...)`
- `RenderEngine::CaptureOutputFrameRgbaTopDownOnRenderThread(...)`
-
-The public runtime entrypoints now use queue/request language. `RequestOutputFrame(...)` remains synchronous so the existing DeckLink callback path can keep producing an output frame while Phase 7's producer/consumer playout queue is still future work.
-
-## Frame Production Shape
-
-A target render-thread frame should look like:
-
-1. wake for input, output demand, preview demand, shader build, reset, screenshot, or stop
-2. drain bounded render commands
-3. coalesce to the latest input frame and latest control/live state
-4. build `RenderFrameInput`
-5. prepare `RenderFrameState`
-6. upload accepted input frame
-7. render layer stack
-8. pack output if needed
-9. stage readback or output buffer
-10. publish preview/screenshot/output completion as needed
-11. record timing and queue metrics
-
-The exact cadence can remain demand-driven initially. The architectural win is that the demand wakes the render thread rather than borrowing GL from the caller.
-
-## Migration Plan
-
-### Step 1. Name Render-Thread-Only Methods
-
-Split existing direct GL methods into public request methods and private render-thread methods without changing behavior much.
-
-Initial target:
-
- [x] keep current synchronous behavior where callers need a result
- [x] move GL bodies into clearly render-thread-owned helpers for upload, output render, preview presentation, and screenshot readback
- [x] make future queue migration mechanical
-
-### Step 2. Add Render Command Queue
-
-Introduce a small queue/mailbox for render commands.
-
-Start with low-risk commands:
-
- [x] preview present request
- [x] screenshot request
- [x] render-local reset requests
- [x] input upload request
- [x] output render request
-
-The queue and wakeup behavior still need the dedicated render thread before the callbacks stop borrowing the GL context.
-
-### Step 3. Start A Dedicated Render Thread
-
-Create the render thread and make it own context binding.
-
- [x] create a dedicated render thread owned by `RenderEngine`
- [x] bind the existing GL context on the render thread for normal runtime work
- [x] stop the render thread before GL context destruction
- [x] keep transitional synchronous request/response for output frames
- [x] remove normal runtime dependence on the shared GL `CRITICAL_SECTION`
- [x] add timeout/failure behavior for render-thread requests
-
-Transitional behavior still allows synchronous request/response for output frames. Render-thread requests now fail fast if they cannot begin within the request timeout, and log over-budget tasks that have already started before waiting for safe completion. The important change is that the caller waits for render-thread completion rather than taking the GL context itself.
-
-### Step 4. Move Input Upload To The Render Thread
-
-Change `OpenGLVideoIOBridge::UploadInputFrame(...)` so it enqueues or replaces the latest input frame.
-
-Policy targets:
-
- [x] bounded memory
- [x] latest-frame wins under load
- [x] input upload skip count is observable through render command coalescing metrics
- [x] input callback never waits for GL
-
-Current implementation: `OpenGLVideoIOBridge::UploadInputFrame(...)` calls `RenderEngine::QueueInputFrame(...)`, which copies the input bytes into the latest-value render mailbox and schedules one bounded render-thread wakeup to upload the newest pending frame.
-
-### Step 5. Move Output Rendering To The Render Thread
-
-Change `OpenGLVideoIOBridge::RenderScheduledFrame(...)` so it requests render-thread output production or consumes a completed render-thread output.
-
-Transitional option:
-
- [x] synchronous request/response through the render thread
-
-Better follow-up:
-
- render ahead into a bounded output queue and let backend callbacks consume ready frames
-
-Current implementation: `OpenGLVideoIOBridge::RenderScheduledFrame(...)` calls `RenderEngine::RequestOutputFrame(...)` and returns whether the render-thread request produced an output frame. `VideoBackend` skips scheduling that frame when render production fails or times out.
-
-### Step 6. Decouple Preview And Screenshot Requests
-
-Preview should become best-effort:
-
- [x] request preview presentation from the render thread
- [x] skip/coalesce when render is busy or output deadline pressure is high
- [x] record preview skips through render command coalescing metrics
-
-Screenshot should become:
-
- [x] queued render-thread capture request
- [x] async disk write remains outside render thread
-
-Current implementation: `OpenGLComposite::RequestScreenshot(...)` builds the output path, queues `RenderEngine::RequestScreenshotCapture(...)`, and the render thread captures pixels before handing them to the existing async PNG writer. Preview presentation is a latest-value best-effort render command that is queued behind output render work, even when requested from the render pipeline.
-
-### Step 7. Remove Shared GL Lock From Normal Paths
-
-Once all GL entrypoints are render-thread-owned:
-
- [x] remove normal dependence on `pMutex` for render correctness
- [x] keep diagnostics that detect wrong-thread render-thread helper calls
- [x] leave only lifecycle context binding where needed
-
-Current implementation: `OpenGLComposite` no longer owns or passes a shared `CRITICAL_SECTION`, and `RenderEngine` no longer has caller-thread GL fallback paths for preview, input upload, output render, or screenshot capture. Runtime callers must go through the render thread; pre-start direct GL fallback is limited to startup initialization while the app explicitly owns the context.
-
-### Shutdown Order
-
-Current shutdown order is explicit in code:
-
-1. `OpenGLComposite::Stop()` stops runtime services so control/OSC work stops entering the runtime.
-2. `VideoBackend::Stop()` stops DeckLink streams/playout so input and output callbacks stop requesting render work.
-3. `RenderEngine::StopRenderThread()` destroys GL resources on the render thread, signals the render thread to stop, joins it, and unbinds the context on render-thread exit.
-4. `WM_DESTROY` deletes `OpenGLComposite`, unbinds the window context, and deletes the GL context.
-
-This order is build-tested, and `RenderCommandQueue` behavior is covered by non-GL unit tests. It still benefits from a real-window/DeckLink shutdown smoke test, but the code path is explicit enough for Phase 4's design exit.
-
-## Testing Strategy
-
-Phase 4 tests should avoid hardware where possible.
-
-Recommended tests:
-
- render command queue preserves FIFO for non-coalesced commands
- latest-input mailbox drops older frames under load
- shutdown path stops backend callbacks before stopping and joining the render thread
- screenshot request receives one completion or failure
- output render request reports failure if render thread is stopped
- render reset commands coalesce where expected
- wrong-thread render-only diagnostics are present on private render-thread helpers
-
-Existing useful homes:
-
- `RuntimeEventTypeTests` for new render/backend observations
- `RuntimeSubsystemTests` for pure request/coalescing helpers
- a future `RenderThreadTests` target if render-thread lifecycle is extracted behind a non-GL test seam
-
-Manual verification will still be needed for:
-
- real DeckLink input/output
- preview interaction
- screenshot capture
- shader reload while rendering
- real window/context shutdown
-
-## Telemetry Added During Phase 4
-
-Phase 4 should add minimal metrics while moving ownership:
-
- render command queue depth
- input frames accepted, replaced, and dropped
- render-thread wake reason counts
- render-thread frame duration
- output request latency
- preview request skipped count
- screenshot request success/failure count
- wrong-thread GL call diagnostics if practical
-
-Full operational reporting remains Phase 8, but these metrics make the threading migration debuggable.
-
-## Risks
-
-### Deadlock Risk
-
-Synchronous request/response shims can deadlock if the caller is already on the render thread or holds a lock the render thread needs. Phase 4 should keep request waits narrow and add render-thread detection early.
-
-### Latency Risk
-
-Moving work through queues can hide latency. Queue depth and output request latency should be measured from the first migration step.
-
-### Lifetime Risk
-
-Moving context ownership changes startup and shutdown order. The render thread must stop before GL resources or window/context handles are destroyed.
-
-### Callback Pressure Risk
-
-If DeckLink callbacks wait too long for render-thread work, Phase 4 may improve GL ownership but still leave callback timing fragile. A synchronous bridge is acceptable as a transition, but the design should keep the path open for producer/consumer playout.
-
-### Preview Coupling Risk
-
-Preview can remain a hidden budget consumer if it stays in the output frame path. Phase 4 should keep preview explicitly best-effort, even if physical decoupling continues later.
-
-## Phase 4 Exit Criteria
-
-Phase 4 can be considered complete once the project can say:
-
- [x] one render thread owns the GL context during normal operation
- [x] input callbacks do not bind GL or wait on GL upload
- [x] output callbacks do not bind GL directly
- [x] preview and screenshot requests enter render through explicit render-thread requests
- [x] `RenderFrameInput` / `RenderFrameState` remain the frame-state contract
- [x] normal frame production no longer depends on a shared GL `CRITICAL_SECTION`
- [x] render-thread queue/mailbox behavior has non-GL tests
- [x] shutdown order is explicit and tested or manually verified
-
-## Open Questions
-
- What exact producer/consumer output queue shape should replace the current synchronous output request in Phase 7?
- Should preview present on the render thread, or should render publish a preview image/texture to a separate presenter?
- Should wrong-thread GL access eventually escalate from debug diagnostics to structured telemetry or assertions?
-
-## Short Version
-
-Phase 4 should make GL ownership boring and deterministic.
-
-One render thread owns the context. Other threads submit work or consume results. Input upload, frame rendering, readback, preview, and screenshot capture all move behind render-thread entrypoints. Output production remains a synchronous request/response bridge for now, but the app no longer relies on callback and UI paths borrowing the GL context under one shared lock.
--- a/docs/PHASE_5_LIVE_STATE_LAYERING_DESIGN.md
+++ b/docs/PHASE_5_LIVE_STATE_LAYERING_DESIGN.md
@@ -1,416 +0,0 @@
-# Phase 5 Design: Live State Layering And Composition
-
-This document expands Phase 5 of [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) into a concrete design target.
-
-Phase 1 named the subsystems. Phase 2 added the typed event substrate. Phase 3 made render-facing live state explicit through `RuntimeLiveState`, `RenderStateComposer`, `RenderFrameInput`, `RenderFrameState`, `RenderFrameStateResolver`, and `RuntimeServiceLiveBridge`. Phase 4 made one render thread the owner of normal runtime GL work. Phase 5 should now make the live parameter model itself explicit: persisted truth, operator/session truth, and transient automation should be separate layers with one predictable composition rule.
-
-## Status
-
- Phase 5 design package: complete.
- Phase 5 implementation: complete.
- Current alignment: Phase 3 introduced the first pure composition boundary and transient OSC overlay owner. Phase 5 now has a small `RuntimeStateLayerModel` inventory that names the current state categories, `RenderStateComposer` consumes a `LayeredRenderStateInput` whose fields make base persisted, committed live, and transient automation inputs explicit, `RuntimeLiveState` owns transient-overlay invalidation against current layer/parameter compatibility, settled OSC commits have an explicit session-only persistence policy, and `CommittedLiveState` physically owns current session layer state. `RuntimeStore` still owns file IO, config, package metadata, preset persistence, and persistence requests.
-
-Current live-state footholds:
-
- `RuntimeStore` owns file IO, config, package metadata, preset persistence, persistent-state serialization, and persistence requests.
- `CommittedLiveState` physically owns the current committed/session layer stack and parameter values.
- `RuntimeCoordinator` owns mutation validation, classification, accepted/rejected event publication, snapshot/reload follow-ups, and the policy switch between committed states and live snapshots.
- `RuntimeSnapshotProvider` publishes render-facing snapshots from committed runtime state.
- `RuntimeLiveState` owns transient OSC overlay bookkeeping, smoothing, generation tracking, and commit-settlement policy.
- `RenderStateComposer` consumes `LayeredRenderStateInput`, chooses committed-live layer states over base-persisted layer states when both are supplied, applies transient automation on top, and returns final per-frame layer states plus settled commit requests.
- `RuntimeServiceLiveBridge` drains OSC ingress/completion queues and applies them to render live state during frame preparation.
- `RuntimeStateLayerModel` names the Phase 5 state categories and classifies current fields as base persisted, committed live, transient automation, render-local, or health/config state.
- `RuntimeCoordinator` can request layer-scoped transient OSC invalidation, while `RuntimeLiveState` prunes overlays that no longer map to the current render-facing layer/parameter definitions.
- `RuntimeCoordinator::CommitOscParameterByControlKey(...)` commits settled OSC values into session state without requesting persistence by default.
- `CommittedLiveState` owns current committed/session layer state and exposes `CommittedLiveStateReadModel` for render snapshot publication.
-
-## Why Phase 5 Exists
-
-The resilience review identifies live OSC overlay and persisted state as separate concepts that still do not have a fully formal model. The app now has better boundaries, but several policies are still implicit:
-
- whether a value is durable, committed for the current session, or transient automation
- whether an OSC value should merely influence the current frame or eventually commit
- what reload, preset load, layer removal, shader change, and reset should do to transient values
- which layer wins when UI/operator changes race with OSC automation
- which state changes should publish snapshots, request persistence, or only affect render frames
-
-Without a formal layering model, these rules can leak across `RuntimeStore`, `RuntimeCoordinator`, `RuntimeLiveState`, `RenderStateComposer`, and service bridges. Phase 5 should make those rules boring and testable.
-
-## Goals
-
-Phase 5 should establish:
-
- explicit state layers for persisted, committed/session, and transient automation values
- one named composition contract for final render values
- clear ownership for layer-specific mutation policy
- explicit reset/reload/preset behavior for transient and committed state
- a clean path for OSC automation to remain high-rate without becoming durable state by accident
- tests for layer precedence, lifecycle, invalidation, and commit policy without GL or DeckLink
- documentation that distinguishes render-local temporal/feedback state from parameter/live-state overlays
-
-## Non-Goals
-
-Phase 5 should not require:
-
- a background persistence writer implementation
- a DeckLink producer/consumer playout queue
- a full cue/timeline/preset performance system
- a new UI state-management framework
- replacing every synchronous coordinator API
- moving temporal history or shader feedback into the runtime state model
-
-Those are later phases or separate feature work. Phase 5 is about parameter and live-value layering.
-
-## Target State Model
-
-Phase 5 should formalize three layers:
-
-| Layer | Owner | Lifetime | Persistence | Render role |
-| --- | --- | --- | --- | --- |
-| Base persisted state | `RuntimeStore` plus durable serialization/preset IO | survives restart | written to disk | default saved layer stack, shader selections, saved parameter values |
-| Committed live state | `CommittedLiveState` with policy owned by `RuntimeCoordinator` | current running session | may request persistence depending on mutation type | operator/UI/current truth until changed again |
-| Transient automation overlay | `RuntimeLiveState` or a new automation overlay collaborator | high-rate/short-lived | not persisted directly | temporary OSC/automation target applied over committed truth |
-
-The target composition rule is:
-
-```text
-final render state = base persisted state + committed live state + transient automation overlay
-```
-
-The actual implementation may continue using render snapshots as the base transport. The important part is that each layer has named ownership, documented lifetime, and tested precedence.
-
-## Current Composition Shape
-
-Today, final frame state is prepared through this path:
-
-1. `OpenGLComposite::renderEffect()` processes runtime work.
-2. `OpenGLComposite` builds `RenderFrameInput`.
-3. `RuntimeServiceLiveBridge` drains OSC updates and completed commits.
-4. `RenderEngine` updates `RuntimeLiveState`.
-5. `RenderFrameStateResolver` chooses committed states or live snapshot states.
-6. `RenderStateComposer` applies transient overlay values.
-7. `RenderEngine::RenderPreparedFrame(...)` consumes `RenderFrameState`.
-
-That is a good Phase 3/4 foundation. Phase 5 should make the hidden assumptions in steps 5 and 6 explicit enough that reset/reload/preset and future UI automation behavior are not scattered across those collaborators.
-
-## Proposed Collaborators
-
-### `RuntimeStateLayerModel`
-
-Optional pure model that names the layers and composition metadata.
-
-Responsibilities:
-
- represent base, committed, and transient layer state inputs
- define precedence and invalidation categories
- expose a pure composition function or input object
- keep GL, services, persistence, and device callbacks out of the model
-
-Non-responsibilities:
-
- disk IO
- OSC socket handling
- render-thread scheduling
- shader compilation
-
-This may be a small set of structs rather than a large class. The value is in naming the contract.
-
-### `CommittedLiveState`
-
-Runtime/session collaborator for committed current-session state that has moved out of `RuntimeStore` physical ownership.
-
-Responsibilities:
-
- hold operator/UI committed values that are true for the current session
- distinguish persistence-required commits from session-only commits
- expose a read model for snapshot publication
- provide reset/load behavior separate from durable storage
-
-Non-responsibilities:
-
- transient OSC smoothing
- disk writes
- GL resources
-
-Phase 5 now uses this physical split. `RuntimeStore` still wraps it for compatibility and persistence IO, but committed values no longer live directly as store fields.
-
-### `AutomationOverlayState`
-
-Possible evolution of `RuntimeLiveState`.
-
-Responsibilities:
-
- hold transient automation values keyed by route/layer/parameter identity
- track generation, commit-in-flight, and completion
- apply smoothing and settle policy
- decide whether an overlay is render-only, commit-requesting, stale, or invalidated
-
-Non-responsibilities:
-
- owning committed truth
- persistent state mutation
- snapshot publication
-
-This can start by renaming or narrowing current `RuntimeLiveState` responsibilities rather than replacing it outright.
-
-### `LayeredStateComposer`
-
-Possible evolution of `RenderStateComposer`.
-
-Responsibilities:
-
- apply the target precedence rule
- produce final `RuntimeRenderState` values for a frame
- return commit requests or overlay observations when policy says a transient value settled
- keep value composition testable without OpenGL
-
-Non-responsibilities:
-
- frame rendering
- service queue draining
- storage mutation
-
-## Layering Rules
-
-### Precedence
-
-Default precedence should be:
-
-1. base persisted/snapshot value
-2. committed live/session value
-3. transient automation overlay
-
-The topmost valid layer wins for discrete values. Numeric/vector values may be smoothed by overlay policy before they win.
-
-### Identity
-
-Layering should use stable render-facing identity:
-
- layer id for persisted structural identity
- layer key/control key for OSC-facing identity
- parameter id for shader-defined identity
- parameter control key for external-control identity
-
-Current policy treats render-facing layer identity plus parameter/control-key compatibility as authoritative. Incompatible transient overlays are pruned before composition, so stale OSC routes do not migrate onto unrelated controls after layer removal, preset load, shader change, or incompatible reload.
-
-### Invalidations
-
-The following should have explicit behavior:
-
- layer removed: clear committed and transient state for that layer
- layer shader changed: clear or remap parameter overlays according to compatible control keys
- preset loaded: replace base/committed state and clear incompatible transient overlays
- shader reload with same controls: preserve compatible transient overlays where safe
- manual reset parameters: clear committed overrides and transient overlays for that layer
- no input/source changes: should not affect parameter layers
-
-### Commit Policy
-
-Transient automation may:
-
- remain render-only
- settle and request a committed mutation
- commit without persistence
- commit with persistence only when the control path explicitly requests it
-
-The policy should be explicit per ingress path or parameter category. Phase 5 does not need a full UI for it, but the default behavior should be documented and tested.
-
-## Event And Snapshot Contract
-
-Phase 5 should clarify which changes publish which effects:
-
-| Change | Snapshot publication | Persistence request | Render reset | Runtime event |
-| --- | --- | --- | --- | --- |
-| persisted layer stack mutation | yes | yes | maybe | accepted mutation + persistence requested |
-| operator live parameter change | yes | maybe | no, unless structural | accepted mutation |
-| transient OSC overlay update | no committed snapshot by default | no | no | optional overlay observation |
-| overlay settled commit | yes if accepted | usually no for OSC | no | accepted mutation or overlay-settled observation |
-| preset load | yes | maybe | temporal/feedback policy dependent | accepted mutation + reload/reset observations |
-| shader change/reload | yes after build | maybe | temporal/feedback policy dependent | shader build/reload events |
-
-This table should evolve with implementation, but Phase 5 should prevent transient overlay updates from masquerading as durable committed state.
-
-## Migration Plan
-
-### Step 1. Inventory Current State Layers
-
-Document and/or encode where each current state category lives:
-
- persisted layer stack and parameter values
- committed current-session parameter values
- runtime compile/reload flags
- transient OSC overlays
- render-local temporal history and feedback state
-
-Initial target:
-
- [x] identify which fields are durable, committed-live, transient automation, render-local, or health/config
- [x] update subsystem docs where the current ownership is misleading
- [x] add small tests for classification if a pure helper exists
-
-### Step 2. Name The Layered Composition Input
-
-Introduce a named composition input model around the previous `RenderStateCompositionInput`.
-
-Initial target:
-
- [x] make base/committed/transient inputs visible in type names or field names
- [x] keep `RenderStateComposer` behavior unchanged at first
- [x] add tests that assert precedence with no GL
-
-Possible outcomes:
-
- [x] add a new `LayeredRenderStateInput`
- [x] no adapter was needed; callers now use the layered input shape directly
-
-### Step 3. Make Reset And Reload Policy Explicit
-
-Move reset/reload transient-state decisions into one policy point.
-
-Initial target:
-
- [x] layer removal clears matching transient overlays
- [x] shader change clears incompatible overlays
- [x] preset load clears incompatible overlays
- [x] shader reload can preserve compatible overlays when requested
- [x] temporal/feedback resets stay render-local and separate from parameter overlays
-
-This is where Phase 5 should prevent "clear everything" and "preserve everything" from being scattered through unrelated code.
-
-Current implementation:
-
- `RuntimeCoordinatorResult` carries a named `RuntimeCoordinatorTransientOscInvalidation` request rather than a raw clear-all flag.
- `RuntimeUpdateController` applies layer-scoped invalidation to both render-owned overlay state and queued OSC service state.
- `RuntimeLiveState::PruneIncompatibleOverlays(...)` is the central compatibility policy for current render-facing layer/parameter definitions.
- `RuntimeLiveState::ApplyToLayerStates(...)` prunes incompatible overlays before applying transient values, so shader changes, preset loads, and layer removals stop carrying stale overlays once the current frame state no longer maps them.
-
-### Step 4. Clarify OSC Commit Semantics
-
-Make the transient-to-committed path explicit.
-
-Initial target:
-
- [x] document and test whether settled OSC commits persist
- [x] ensure stale generation completions are ignored
- [x] ensure one settled route does not clear unrelated overlay state
- [x] publish or preserve useful events for accepted overlay commits
-
-Current Phase 3 behavior is a good base; Phase 5 should make the policy easier to reason about from the code.
-
-Current policy:
-
- settled OSC commits are `RuntimeCoordinatorOscCommitPersistence::SessionOnly` by default
- accepted settled OSC commits update the committed session value through `RuntimeStore::SetStoredParameterValue(..., persistState = false, ...)`
- accepted settled OSC commits publish runtime mutation/state-change observations, but no `RuntimePersistenceRequested` event
- accepted service-side commit completions publish `OscOverlaySettled`
- stale generation completions are ignored by `RuntimeLiveState::ApplyOscCommitCompletions(...)`
- unrelated routes remain untouched when a different route settles or completes
-
-### Step 5. Separate Committed-Live Concept From Durable Storage
-
-Separate the committed-live concept from durable storage with both a physical owner and a read/model boundary.
-
-Earlier conservative option:
-
- [x] add a named committed-live read model
- [x] keep persistence decisions in `RuntimeCoordinator`
-
-Stronger option:
-
- [x] introduce `CommittedLiveState`
- [x] make `RuntimeSnapshotProvider` consume committed live state through a read model
- [x] leave durable writes in `RuntimeStore`
-
-The implementation now has the stronger split while keeping `RuntimeStore` as the compatibility facade for existing callers.
-
-Current implementation:
-
- `CommittedLiveState` physically owns the current committed/session layer stack.
- `CommittedLiveStateReadModel` carries the current committed/session layer stack and shader package metadata used by snapshot publication.
- `RenderSnapshotReadModel` contains `committedLiveState` rather than exposing layer-stack fields directly.
- `RenderSnapshotBuilder` builds render snapshots and parameter refreshes from committed-live read APIs.
- `RuntimeStore` still owns config, package metadata, disk IO, preset files, and persistent-state serialization, but delegates current-session layer mutations to `CommittedLiveState`.
-
-### Step 6. Update Docs And Exit Criteria
-
-Before calling Phase 5 complete, update:
-
- [x] architecture review checklist
- [x] `RuntimeCoordinator`, `RuntimeStore`, `RuntimeSnapshotProvider`, `RenderEngine`, and `ControlServices` subsystem docs
- [x] Phase 6 assumptions about persistence inputs
- [x] Phase 7 assumptions about what render/backend state is not part of live parameter layering
-
-## Testing Strategy
-
-Phase 5 tests should avoid GL, DeckLink, sockets, and filesystem writes where possible.
-
-Recommended tests:
-
- base value is used when no committed or transient value exists
- committed value overrides base value
- transient overlay overrides committed value
- numeric smoothing applies only to transient overlay values
- trigger/bool/discrete overlay behavior is explicit
- layer removal clears matching transient state
- shader change preserves only compatible overlays if policy allows
- preset load clears or replaces committed/transient state according to policy
- settled OSC overlay creates the expected commit request
- settled OSC commit does not request persistence unless policy says so
- stale commit completion does not clear a newer overlay
- render-local temporal/feedback resets do not mutate parameter layers
-
-Existing useful homes:
-
- `RuntimeLiveStateTests` for overlay generation, smoothing, settle, and invalidation behavior
- `RuntimeSubsystemTests` for coordinator mutation, persistence request, and reset/reload policy
- `RuntimeEventTypeTests` for any new observations or accepted mutation events
- a possible new `RuntimeStateLayeringTests` target if the composition model gets a pure helper
-
-## Risks
-
-### Over-Abstraction Risk
-
-It would be easy to introduce too many state containers. Phase 5 should add names where they clarify behavior, not create an elaborate framework.
-
-### Persistence Confusion Risk
-
-Committed live state and persisted state are related but not identical. If Phase 5 blurs them, Phase 6's background persistence writer will inherit ambiguous inputs.
-
-### Automation Surprise Risk
-
-OSC automation can be high-rate and transient, but users may expect settled values to become "real." The commit policy needs to be explicit enough that UI, OSC, presets, and reloads behave predictably.
-
-### Identity/Compatibility Risk
-
-Shader changes and preset loads can invalidate layer/parameter identities. Phase 5 should prefer conservative clearing over accidental application of an old automation value to the wrong control.
-
-### Render Coupling Risk
-
-Render-local resources such as temporal history, feedback buffers, readback caches, and playout queues are not parameter layers. Keeping them out of this model avoids turning Phase 5 into a render-resource refactor.
-
-## Phase 5 Exit Criteria
-
-Phase 5 can be considered complete once the project can say:
-
- [x] persisted, committed-live, and transient automation layers are named in code or clear read models
- [x] final render-value precedence is explicit and covered by tests
- [x] `RenderStateComposer` or its replacement consumes a layered input contract
- [x] reset/reload/preset behavior for transient overlays is centralized or clearly delegated
- [x] OSC overlay settle/commit behavior is explicit, including persistence policy
- [x] `RuntimeStore` remains durable-state focused and does not absorb transient automation policy
- [x] render-local temporal/feedback state remains separate from live parameter layering
- [x] subsystem docs and the architecture review reflect the final ownership model
-
-## Open Questions
-
- Should transient OSC overlay updates become app-level typed events, or stay source-local through `RuntimeServiceLiveBridge`?
- Should overlay commit persistence be global, ingress-specific, or parameter-definition-driven?
- What compatibility rule should apply when shader reload preserves a control key but changes parameter shape?
- Should preset load clear all transient automation, or only automation that no longer maps to the loaded stack?
- Should UI slider drags use the committed-live layer directly, or a short-lived transient layer that commits on release?
-
-## Short Version
-
-Phase 5 should make live values boring and explicit.
-
-Persisted state is durable truth. Committed live state is current-session/operator truth. Transient automation is high-rate overlay truth. Render consumes the composed result, and each layer has clear ownership, lifetime, persistence behavior, and reset/reload rules.
--- a/docs/PHASE_6_BACKGROUND_PERSISTENCE_DESIGN.md
+++ b/docs/PHASE_6_BACKGROUND_PERSISTENCE_DESIGN.md
@@ -1,346 +0,0 @@
-# Phase 6 Design: Background Persistence
-
-This document expands Phase 6 of [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) into a concrete design target.
-
-Phases 1-5 separate durable state, coordination policy, render-facing snapshots, render-thread ownership, and live-state layering. Phase 6 should make disk persistence a background snapshot-writing concern instead of a synchronous side effect of mutations.
-
-## Status
-
- Phase 6 design package: complete.
- Phase 6 implementation: Step 6 complete.
- Current alignment: `RuntimeStore` owns durable serialization, config, package metadata, preset IO, and persistence request execution; `CommittedLiveState` owns the current committed/session layer state; and `RuntimeCoordinator` publishes typed persistence requests for persisted mutations. Runtime-state persistence is now requested through the coordinator/event path and executed by the background writer.
-
-Current persistence footholds:
-
- `RuntimeStore` owns persistent runtime-state serialization, stack preset serialization, and durable file IO.
- `CommittedLiveState` owns current committed/session layer and parameter state.
- `RuntimeCoordinatorResult::persistenceRequested` exists as an explicit mutation outcome.
- `RuntimeEventType::RuntimePersistenceRequested` now carries a `PersistenceRequest`.
- `PersistenceRequest` and `PersistenceSnapshot` name the request/snapshot contract that later steps will hand to the writer.
- Phase 5 clarified which live-state mutations are durable, committed-live, transient automation, or render-local. Settled OSC commits are session-only by default and do not request persistence.
-
-## Why Phase 6 Exists
-
-Synchronous persistence is a poor fit for live software. A mutation that changes state should not also have to block on filesystem timing, antivirus scans, slow disks, or transient IO failures. The app needs persistence to be reliable and observable, but not timing-sensitive.
-
-The resilience review calls this out because synchronous save-after-mutate behavior can create unnecessary stalls and makes recovery harder to reason about.
-
-Phase 6 should turn persistence into:
-
- request
- snapshot
- background write
- completion/failure observation
-
-## Goals
-
-Phase 6 should establish:
-
- a queued persistence request path
- debounced/coalesced durable-state snapshot writes
- atomic file replacement for runtime-state saves where practical
- structured completion/failure reporting
- clear separation between state mutation and disk flush
- deterministic shutdown flushing policy
- tests for coalescing, snapshot selection, write failure, and shutdown behavior without rendering or DeckLink
-
-## Non-Goals
-
-Phase 6 should not require:
-
- changing live-state layering rules
- changing DeckLink/backend lifecycle
- replacing stack preset semantics wholesale
- adding cloud sync or external storage
- building an unlimited historical state archive
- making every write async immediately if a narrow compatibility path still needs a synchronous result
-
-## Target Model
-
-Phase 6 should make persistence a small pipeline:
-
-```text
-RuntimeCoordinator accepts mutation
-  -> publishes/returns persistence request
-  -> PersistenceWriter captures a durable snapshot from RuntimeStore serialization
-  -> background worker debounces/coalesces writes
-  -> atomic write commits file
-  -> HealthTelemetry/runtime event records success or failure
-```
-
-The key rule is:
-
- `RuntimeStore` owns durable state and serialization
- `CommittedLiveState` owns current session state; only coordinator-approved durable snapshots should be persisted
- `PersistenceWriter` owns when and how snapshots are written
- `RuntimeCoordinator` owns whether a mutation requests persistence
-
-## Proposed Collaborators
-
-### `PersistenceWriter`
-
-Owns the worker thread, queue, debounce timer, and write execution.
-
-Responsibilities:
-
- accept persistence requests
- coalesce repeated runtime-state writes
- request/build a durable snapshot from `RuntimeStore`
- write to a temporary file and atomically replace the target
- report success/failure observations
- flush on shutdown according to policy
-
-Non-responsibilities:
-
- deciding mutation validity
- owning durable in-memory state
- composing render snapshots
- blocking render/backend timing paths
-
-### `PersistenceSnapshot`
-
-Immutable write input captured from durable state.
-
-Responsibilities:
-
- contain serialized runtime-state text or structured data ready to serialize
- identify target path and snapshot generation
- preserve enough metadata for completion/failure diagnostics
-
-Non-responsibilities:
-
- mutation policy
- file IO
-
-### `PersistenceRequest`
-
-Small request object or event payload.
-
-Expected fields:
-
- reason/action name
- target kind: runtime state, preset, config if later needed
- optional debounce key
- force/flush flag for explicit save operations
- generation or sequence
-
-## Write Policy
-
-### Runtime State
-
-Default policy:
-
- coalesce repeated requests
- debounce short bursts
- write newest snapshot
- report failures without blocking render/control paths
-
-### Stack Presets
-
-Preset save is more operator-explicit than routine runtime-state persistence.
-
-Initial policy options:
-
- keep preset save synchronous while runtime-state persistence becomes async
- or route preset writes through the same worker with a completion result for the caller
-
-Conservative Phase 6 default:
-
- background runtime-state persistence first
- leave preset save/load synchronous unless the implementation has a clean completion path
-
-### Shutdown
-
-Shutdown should explicitly decide:
-
- flush latest pending snapshot before exit
- skip flush if no pending durable change exists
- report/write failure if flush fails
- avoid indefinite hang on shutdown
-
-## Atomicity And Failure Handling
-
-Runtime-state writes should prefer:
-
-1. serialize snapshot content in memory
-2. write to `target.tmp`
-3. flush/close file
-4. replace target atomically where platform support allows
-5. retain or report backup/failure context if replacement fails
-
-Failures should not silently disappear. They should publish:
-
- persistence target
- reason/action
- error message
- whether a newer request is pending
- whether the app is still running with unsaved changes
-
-## Migration Plan
-
-### Step 1. Name Persistence Requests
-
-Make request types and event payloads explicit enough that callers stop thinking in terms of direct disk writes.
-
-Initial target:
-
- [x] keep existing coordinator persistence decisions
- [x] introduce a `PersistenceRequest`/`PersistenceSnapshot` shape
- [x] document which requests are debounceable
-
-Current implementation:
-
- `runtime/persistence/PersistenceRequest.h` defines `PersistenceTargetKind`, `PersistenceRequest`, and `PersistenceSnapshot`.
- `RuntimePersistenceRequestedEvent` carries a typed `PersistenceRequest`.
- `RuntimeCoordinator` emits runtime-state persistence requests with reason, debounce key, and debounce policy.
- Existing synchronous save behavior is intentionally unchanged until Step 2/3.
-
-### Step 2. Extract Snapshot Writing From `RuntimeStore`
-
-Move file-write mechanics behind a helper while keeping serialization ownership in `RuntimeStore`.
-
-Initial target:
-
- [x] `RuntimeStore` can build serialized runtime-state snapshots
- [x] `PersistenceWriter` writes the snapshot
- [x] existing synchronous save path can call through the writer/helper during transition
-
-Current implementation:
-
- `RuntimeStore::BuildRuntimeStatePersistenceSnapshot(...)` captures serialized runtime-state content and target path.
- `PersistenceWriter::WriteSnapshot(...)` owns the temp-file and replace write mechanics.
- Runtime-state persistence now flows through `RuntimeStore::RequestPersistence(...)` and the background writer.
- Stack preset saves still use `PersistenceWriter` synchronously; preset async policy remains a later decision.
-
-### Step 3. Add Debounced Background Worker
-
-Introduce a worker thread or queued task owner.
-
-Initial target:
-
- [x] repeated runtime-state requests coalesce
- [x] worker writes only latest pending snapshot
- [x] tests cover coalescing without filesystem where possible
-
-Current implementation:
-
- `PersistenceWriter::EnqueueSnapshot(...)` starts a worker lazily and debounces snapshots by `debounceKey`.
- Runtime-state saves enqueue debounced snapshots, so routine mutation paths no longer write the runtime-state file directly.
- Synchronous `PersistenceWriter::WriteSnapshot(...)` remains for stack preset saves.
- `PersistenceWriterTests` use an injected in-memory sink to verify coalescing and non-coalesced immediate requests without touching the filesystem.
-
-### Step 4. Add Atomic Write And Failure Reporting
-
-Make disk writes safer and observable.
-
-Initial target:
-
- [x] temp-file then replace
- [x] failure returned/published with structured reason
- [x] `HealthTelemetry` receives persistence warning state
-
-Current implementation:
-
- `PersistenceWriter::WriteSnapshot(...)` and worker writes use temp-file then `MoveFileExA(..., MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)`.
- `PersistenceWriteResult` reports target kind, target path, reason, success/failure, error message, and whether newer work was pending.
- `RuntimeStore` wires persistence write results into `HealthTelemetry`.
- `HealthTelemetry` records persistence success/failure counts, last target/reason/error, pending-newer-request state, and unsaved-change state.
-
-### Step 5. Wire Coordinator/Event Requests To Writer
-
-Route `RuntimePersistenceRequested` or coordinator persistence outcomes into the writer.
-
-Initial target:
-
- [x] accepted durable mutations request persistence
- [x] transient-only mutations do not
- [x] runtime reload/preset policies remain explicit
-
-Current implementation:
-
- Store mutation methods update committed durable/session state and mark render state dirty, but no longer enqueue runtime-state writes directly.
- `RuntimeCoordinator` remains the owner of the persistence decision and publishes `RuntimePersistenceRequested` only for accepted durable mutations.
- `RuntimeUpdateController` handles `RuntimePersistenceRequested` and calls `RuntimeStore::RequestPersistence(...)`.
- `RuntimeStore::RequestPersistence(...)` validates the request target, builds the runtime-state snapshot, enqueues it on `PersistenceWriter`, and records enqueue failures in `HealthTelemetry`.
- Stack preset save remains a synchronous preset-file write; preset load updates state and relies on the coordinator persistence request for runtime-state persistence.
-
-### Step 6. Define Shutdown Flush
-
-Make app shutdown persistence behavior deterministic.
-
-Initial target:
-
- [x] stop accepting new requests
- [x] flush latest pending snapshot with bounded wait
- [x] report failure if flush fails
-
-Current implementation:
-
- `PersistenceWriter::StopAndFlush(timeout, error)` stops accepting new snapshots, forces debounced snapshots ready, drains pending work, and reports timeout/failure to the caller.
- `RuntimeStore::FlushPersistenceForShutdown(...)` provides the runtime-level shutdown API and records flush failures in `HealthTelemetry`.
- `OpenGLComposite::Stop()` and the destructor explicitly flush persistence after control services/backend/render-thread shutdown.
- `PersistenceWriterTests` cover shutdown draining, request rejection after shutdown, and timeout/retry behavior without rendering or DeckLink.
-
-## Testing Strategy
-
-Recommended tests:
-
- repeated persistence requests coalesce into one write
- newest snapshot wins after multiple mutations
- transient-only mutation does not request persistence
- write failure records an error and keeps unsaved state visible
- shutdown flush writes pending snapshot
- shutdown with no pending request does not write
- preset save path remains explicit
- temp-file replacement success/failure is handled
-
-Useful homes:
-
- `RuntimeSubsystemTests` for coordinator persistence outcomes
- a new `PersistenceWriterTests` target for worker/coalescing/write policy
- filesystem tests using a temporary directory for atomic write behavior
-
-## Risks
-
-### Data Loss Risk
-
-Debouncing introduces a window where in-memory state is newer than disk. Shutdown flush and unsaved-state telemetry are the guardrails.
-
-### Complexity Risk
-
-A persistence worker can become a hidden second store if it owns mutable truth. It should own snapshots and write policy only.
-
-### Blocking Shutdown Risk
-
-Flushing forever on shutdown is not acceptable. Use bounded waits and visible failure reporting.
-
-### Preset Semantics Risk
-
-Operator-triggered preset save often feels like it should complete before reporting success. Keep preset behavior explicit rather than silently changing it.
-
-## Phase 6 Exit Criteria
-
-Phase 6 can be considered complete once the project can say:
-
- [x] durable mutations enqueue persistence instead of directly writing from mutation paths
- [x] runtime-state writes are debounced/coalesced
- [x] writes use temp-file/replace or equivalent atomic policy
- [x] persistence failures are reported through structured health/events
- [x] transient/live-only mutations do not request persistence
- [x] shutdown flush behavior is explicit and tested
- [x] `RuntimeStore` remains durable-state/serialization owner, not worker policy owner
- [x] persistence behavior has focused non-render tests
-
-## Open Questions
-
- Should preset save remain synchronous, or move behind a completion-based async request?
- What debounce interval is appropriate for routine runtime-state writes?
- Should failed persistence retry automatically, or wait for the next mutation/request?
- Should the app expose "unsaved changes" in the UI/health snapshot?
- Should runtime config writes share this worker, or stay separate?
-
-## Short Version
-
-Phase 6 should make persistence boring, safe, and off the hot path.
-
-Mutations update in-memory durable state. Persistence requests are queued and coalesced. A background writer saves atomic snapshots and reports failures. Render, backend callbacks, and control ingress should not pay filesystem costs.
--- a/docs/PHASE_7_5_PROACTIVE_PLAYOUT_TIMING_DESIGN.md
+++ b/docs/PHASE_7_5_PROACTIVE_PLAYOUT_TIMING_DESIGN.md
@@ -1,348 +0,0 @@
-# Phase 7.5 Design: Proactive Playout Timing
-
-This document summarizes the timing-specific findings from [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) and turns them into a focused bridge phase after [PHASE_7_BACKEND_LIFECYCLE_PLAYOUT_DESIGN.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/PHASE_7_BACKEND_LIFECYCLE_PLAYOUT_DESIGN.md).
-
-Phase 7 made backend lifecycle, playout policy, ready-frame queueing, late/drop recovery, and backend playout health explicit. Phase 7.5 should use those foundations to move output production from demand-filled scheduling toward proactive, deadline-aware playout.
-
-## Status
-
- Phase 7.5 design package: proposed.
- Phase 7.5 implementation: Step 5 in progress.
- Current alignment: Phase 7 is complete. `RenderOutputQueue`, `VideoPlayoutPolicy`, `VideoPlayoutScheduler`, `VideoBackendLifecycle`, and backend playout telemetry exist. The backend worker fills the ready queue on completion demand, but render production is not yet proactively driven by queue pressure or video cadence.
-
-Current footholds:
-
- `RenderEngine` owns normal GL work on the render thread.
- `VideoBackend` owns backend lifecycle, completion processing, ready-frame queue use, and backend playout health reporting.
- `RenderOutputQueue` reports depth, capacity, pushed, popped, dropped, and underrun counts.
- `VideoPlayoutPolicy` names ready-frame headroom and catch-up policy.
- `HealthTelemetry::BackendPlayoutSnapshot` exposes queue depth, underruns, late/drop streaks, and recovery decisions.
- Step 1 adds baseline timing fields for ready-queue min/max/zero-depth samples and output render duration.
- Step 2 adds a pure `OutputProductionController` for queue-pressure production decisions.
- Step 3 adds a proactive output producer worker that keeps `RenderOutputQueue` warm after playback starts.
- Step 4 skips non-forced preview presentation while output ready-queue depth is below target.
- Step 5 makes async readback misses prefer cached output over synchronous readback after bootstrap.
-
-## Timing Review Findings
-
-The resilience review highlights several timing risks that remain after basic render-thread and backend ownership cleanup:
-
- playout is still effectively filled on demand instead of continuously produced ahead
- output buffering is named, but queue depth is not yet tuned against measured render/readback cost
- GPU readback has an asynchronous path, but the miss path can still fall back to synchronous readback
- preview presentation is best-effort, but it still shares render-thread budget with playout
- telemetry is improving, but render timing is still too coarse to distinguish draw, pack, fence wait, readback copy, and preview cost
-
-The practical concern is not average frame time. It is what happens during a short spike. A single slow render, readback wait, preview present, or callback scheduling delay can drain playout headroom and cause late or dropped output frames.
-
-## Why Phase 7.5 Exists
-
-Phase 7 made the backend safer and observable, but Step 5 intentionally stopped at demand-filled queue behavior:
-
- a completion arrives
- the backend worker fills the ready queue to target depth
- the backend schedules one ready frame
-
-That is better than callback-thread rendering, but it still couples frame production to output completion pressure. Phase 7.5 should make render production proactive:
-
- keep the ready queue near target depth before the device asks for the next frame
- let DeckLink consume already-prepared frames
- treat queue depth as the pressure signal between render and backend
- make preview and readback fallback subordinate to output deadlines
-
-## Goals
-
-Phase 7.5 should establish:
-
- a proactive output producer that fills `RenderOutputQueue` based on queue pressure
- a clear trigger model for output production: queue-low, cadence tick, or both
- a bounded sleep/yield strategy when the ready queue is full
- explicit priority rules between playout, preview, screenshots, shader work, and background render requests
- readback miss behavior that does not blindly return to the most timing-sensitive synchronous path
- telemetry that can explain why the queue drains: render cost, readback wait, preview cost, or scheduling pressure
- pure tests for producer pressure policy where possible
-
-## Non-Goals
-
-Phase 7.5 should not require:
-
- replacing the renderer
- replacing DeckLink support
- a full telemetry subsystem rewrite
- perfect adaptive latency
- a new UI
- changing live-state layering or persistence semantics
-
-This phase is about output timing behavior, not broad subsystem redesign.
-
-## Target Timing Model
-
-The target model is:
-
-```text
-Video cadence / queue pressure
-  -> proactive output producer request
-  -> RenderEngine renders and reads back output frame
-  -> RenderOutputQueue stores ready frame
-  -> VideoBackend consumes ready frame for DeckLink scheduling
-```
-
-The important difference from Phase 7 is that output production should not wait until a completion has already created demand. The queue should usually have headroom before the completion worker needs to schedule.
-
-Suggested pressure rules:
-
- if ready depth is below `targetReadyFrames`, request output production immediately
- if ready depth is at or above `maxReadyFrames`, producer sleeps or yields
- if late/drop streak grows, temporarily bias toward output production over preview
- if readback is late, prefer stale/black underrun policy over blocking the deadline path
- if preview is due but output queue is below target, skip or delay preview
-
-## Proposed Collaborators
-
-### `OutputProductionController`
-
-Small policy owner that decides when to request another output frame.
-
-Responsibilities:
-
- evaluate ready queue depth and capacity
- evaluate late/drop/underrun pressure
- decide whether to produce, sleep, or yield
- keep policy testable without DeckLink or GL
-
-Non-responsibilities:
-
- GL rendering
- DeckLink scheduling
- live-state composition
-
-### `OutputProducerWorker`
-
-Worker or render-thread-adjacent loop that keeps output frames ready.
-
-Responsibilities:
-
- wake on queue-low pressure
- request render-thread output production
- push completed frames into `RenderOutputQueue`
- stop cleanly before render/backend teardown
-
-Non-responsibilities:
-
- device callback handling
- hardware scheduling
- persistent state mutation
-
-### `RenderTimingBreakdown`
-
-Lightweight render timing sample for the output path.
-
-Initial fields:
-
- total output render time
- draw/composite time
- output pack time
- readback fence wait time
- readback copy time
- synchronous readback fallback count
- preview present cost
- preview skipped count
-
-This can be reported into existing telemetry first, then Phase 8 can fold it into the broader health model.
-
-## Migration Plan
-
-### Step 1. Snapshot Current Timing Behavior
-
-Use existing Phase 7 telemetry to capture baseline behavior before changing production cadence.
-
-Initial target:
-
- [x] record ready queue depth over time while running
- [x] record underrun count, late/drop streaks, and catch-up frames
- [x] record output render duration and completion interval
- [x] identify whether queue depth regularly falls to zero
-
-Exit criteria:
-
- [x] there is a clear before/after baseline for proactive production
- [x] runtime-state output exposes enough values to diagnose whether queue starvation is happening
-
-Implementation notes:
-
- `HealthTelemetry::BackendPlayoutSnapshot` exposes current, min, max, and zero-depth ready-queue samples.
- `VideoBackend` samples ready-queue depth before demand-fill, after queue fill, and after scheduling from the queue.
- `VideoBackend` records last, smoothed, and max output render duration for demand-produced output frames.
- Runtime-state JSON exposes the baseline under `backendPlayout.readyQueue` and `backendPlayout.outputRender`.
-
-### Step 2. Extract Output Production Policy
-
-Introduce a pure policy helper for queue-pressure decisions.
-
-Initial target:
-
- [x] input: ready depth, capacity, target depth, late/drop streaks, underrun count
- [x] output: produce, wait, or throttle
- [x] tests cover low queue, full queue, late/drop pressure, and normalized policy values
-
-Exit criteria:
-
- [x] production cadence policy can evolve without touching DeckLink or GL code
-
-Implementation notes:
-
- `OutputProductionController` lives in `videoio` and depends only on `VideoPlayoutPolicy`.
- `OutputProductionPressure` carries ready-queue depth/capacity plus underrun and late/drop pressure.
- `OutputProductionDecision` returns `Produce`, `Wait`, or `Throttle`, a requested frame count, effective target/max limits, and a reason string.
- Step 2 is intentionally not wired into live playback yet. Step 3 should use this policy to drive the proactive producer loop.
-
-### Step 3. Add A Proactive Producer Loop
-
-Move from demand-filled output production to queue-pressure production.
-
-Initial target:
-
- [x] producer wakes when queue depth is below target
- [x] producer requests render-thread output production until target depth is reached
- [x] producer stops when backend stops or render thread shuts down
- [x] completion worker mostly schedules from already-ready frames
-
-Exit criteria:
-
- [x] normal playback does not depend on completion processing to fill the queue from empty
- [x] callback/completion pressure and render production pressure are separate
-
-Implementation notes:
-
- `VideoBackend` starts the completion worker before device start, then starts the output producer only after DeckLink start succeeds. This avoids fighting DeckLink preroll for the same output frame pool.
- `OutputProducerWorkerMain()` periodically wakes and uses `OutputProductionController` to decide whether to produce, wait, or throttle.
- Completion handling records pacing/recovery, updates producer pressure, schedules a ready frame, and wakes the producer to refill headroom.
- Completion handling keeps a one-frame synchronous fallback when the ready queue is unexpectedly empty, then falls back to black underrun behavior if that also fails.
- Producer shutdown is explicit and joined before video output teardown.
-
-### Step 4. Prioritize Playout Over Preview
-
-Make preview explicitly subordinate to output playout deadlines.
-
-Initial target:
-
- [x] skip or delay preview when ready queue depth is below target
- count skipped previews
- record preview present cost separately from output render cost
-
-Exit criteria:
-
- [x] preview cannot drain output headroom invisibly
- runtime telemetry shows preview skips and preview present cost
-
-Implementation notes:
-
- `OpenGLComposite::paintGL(false)` now skips preview presentation when `VideoBackend` reports that the ready queue is below the target depth.
- Forced preview paints are still allowed so resize/manual paint behavior remains intact.
- Preview skip counters and present-cost telemetry remain follow-up work for this step.
-
-### Step 5. Make Readback Miss Policy Deadline-Aware
-
-Avoid turning a late async readback fence into synchronous deadline pressure by default.
-
-Initial target:
-
- count async readback misses
- count synchronous fallback uses
- [x] allow policy to prefer stale/black output over synchronous fallback when queue pressure is high
- [x] keep current fallback available while behavior is measured
-
-Exit criteria:
-
- [x] readback fallback is an explicit policy decision
- [x] late GPU fences do not automatically block the most timing-sensitive path
-
-Implementation notes:
-
- `OpenGLRenderPipeline::ReadOutputFrame()` now uses synchronous readback only to bootstrap the first cached output frame.
- After cached output exists, an async readback miss copies the cached output frame into the DeckLink output frame instead of blocking on synchronous `glReadPixels`.
- Async readback queueing now skips when the next PBO slot is still in flight rather than deleting an in-flight fence and overwriting it.
- Miss/fallback counters remain follow-up telemetry work for this step.
-
-### Step 6. Tune Headroom Policy
-
-Use measured behavior to choose default queue depth and latency tradeoffs.
-
-Initial target:
-
- compare 30fps and 60fps behavior
- tune `targetReadyFrames` and `maxReadyFrames`
- document expected latency cost of each default
- keep the setting centralized in `VideoPlayoutPolicy`
-
-Exit criteria:
-
- default headroom values are based on observed timing, not guesswork
- latency versus resilience tradeoff is documented
-
-## Testing Strategy
-
-Recommended tests:
-
- production policy requests work when queue is below target
- production policy throttles when queue is full
- late/drop pressure biases toward production
- preview policy skips when output queue is below target
- readback miss policy selects stale/black versus synchronous fallback according to pressure
- producer shutdown drains or cancels work without touching destroyed render/backend state
-
-Useful homes:
-
- a new `OutputProductionControllerTests`
- `RenderOutputQueueTests` for pressure-adjacent queue behavior
- `VideoPlayoutSchedulerTests` for recovery/pressure interactions
- non-GL fakes for producer loop wake/stop behavior
-
-## Risks
-
-### Latency Risk
-
-More ready frames means more latency. Phase 7.5 should make that latency a visible, measured policy choice.
-
-### Producer Runaway Risk
-
-A proactive producer must not spin when the queue is full or when output is stopped.
-
-### Buffer Ownership Risk
-
-Ready frames must not be reused while DeckLink or the render path still owns their buffers.
-
-### Readback Policy Risk
-
-Stale or black output may be preferable to a missed deadline, but it can be visually obvious. External keying may make stale/black fallback more sensitive.
-
-### Preview Regression Risk
-
-Treating preview as subordinate may make desktop preview less smooth. That is acceptable only if playout quality improves and preview skips are visible.
-
-## Phase 7.5 Exit Criteria
-
-Phase 7.5 can be considered complete once the project can say:
-
- [ ] output production is driven by queue pressure or cadence, not only by completion demand
- [ ] completion handling normally schedules already-ready frames
- [ ] preview work is explicitly lower priority than playout
- [ ] readback miss behavior is explicit and deadline-aware
- [ ] queue depth, underruns, render timing, readback misses, and preview skips are visible
- [ ] default ready-frame headroom is documented for target frame rates
- [ ] production policy has non-DeckLink tests
-
-## Open Questions
-
- Should proactive production be driven by a timer, queue-low notifications, or both?
- Should the producer live inside `VideoBackend`, `RenderEngine`, or a small playout controller between them?
- Should underrun default to black, last scheduled, or newest completed output once proactive production exists?
- How much latency is acceptable at 30fps and 60fps?
- Should preview have a hard minimum frame rate, or be fully opportunistic under playout pressure?
- Should synchronous readback fallback be disabled automatically after repeated late/drop pressure?
-
-## Short Version
-
-Phase 7 made playout observable and safer. Phase 7.5 should make it proactive.
-
-The render side should keep the output queue warm before DeckLink needs the next frame. DeckLink should consume ready frames. Preview and synchronous readback fallback should never quietly steal the budget needed to hit output deadlines.
--- a/docs/PHASE_7_5_READBACK_EXPERIMENT_LOG.md
+++ b/docs/PHASE_7_5_READBACK_EXPERIMENT_LOG.md
@@ -1,513 +0,0 @@
-# Phase 7.5 Readback Experiment Log
-
-This log tracks short readback experiments during the proactive playout timing work.
-
-The later experiments point to a larger ownership change rather than more local fallback tweaks. The proposed follow-up design is [PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md).
-
-## How To Run
-
-The default debugger launch keeps the current production path:
-
- `Debug LoopThroughWithOpenGLCompositing`
- `VST_OUTPUT_READBACK_MODE` unset
- mode: `async_pbo`
-
-Comparison modes are still available:
-
- `VST_OUTPUT_READBACK_MODE=async_pbo`
- uses the older PBO/fence readback path
-
-The experiment launches are:
-
- `Debug LoopThroughWithOpenGLCompositing - sync readback experiment`
- `VST_OUTPUT_READBACK_MODE=sync`
- uses direct synchronous `glReadPixels()` every output frame
-
- `Debug LoopThroughWithOpenGLCompositing - cached output experiment`
- `VST_OUTPUT_READBACK_MODE=cached_only`
- uses one bootstrap synchronous readback, then copies the cached output frame without ongoing GPU readback
-
-The cached-output experiment is not visually correct for live motion. It exists to test whether removing ongoing GPU readback lets the producer fill the ready queue again.
-
-## Experiment 3: fast_transfer
-
-Status: removed from active code after hardware sample
-
-Date: 2026-05-11
-
-Change:
-
- DeckLink output frames are now created with `CreateVideoFrameWithBuffer()`.
- Output frame buffers are owned by `PinnedMemoryAllocator`.
- `VideoIOOutputFrame` carries a texture-transfer callback.
- The test branch changed the default render readback path to try `VideoFrameTransfer::GPUtoCPU` against the output texture for BGRA output.
- If fast transfer is unavailable or fails, the code falls back to cached output if present, then synchronous readback as a safety fallback.
-
-Question:
-
-Can SDK-style pinned/DVP transfer recover real rendered output timing without the visually-invalid cached-only shortcut?
-
-Result:
-
- The test machine reported `GL_VENDOR=NVIDIA Corporation` and `GL_RENDERER=NVIDIA GeForce RTX 4060 Ti/PCIe/SSE2`.
- The DeckLink SDK OpenGL fast-transfer sample gates NVIDIA DVP on `GL_RENDERER` containing `Quadro`.
- `GL_AMD_pinned_memory` was also unavailable.
- The fast-transfer path was removed from active code to avoid carrying unsupported DVP dependencies while we investigate CPU-frame buffering and render-ahead.
-
-## Baseline: async_pbo
-
-Date: 2026-05-11
-
-Observed while the app was running after adding the async queue split counters.
-
-Summary:
-
- ready queue was pinned at 0 or briefly 1
- underrun, zero-depth, late, and dropped counts increased continuously
- `renderRequestMs` usually sat around 16-25 ms, with occasional larger spikes
- `asyncQueueMs` was mostly explained by `asyncQueueReadPixelsMs`
- PBO allocation/orphaning was effectively 0 ms
-
-Representative samples:
-
-| readyDepth | renderRequestMs | queueWaitMs | drawMs | mapMs | copyMs | asyncQueueMs | asyncQueueBufferMs | asyncQueueReadPixelsMs |
-| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-| 0 | 24.915 | 3.018 | 0.510 | 0.923 | 0.768 | 9.018 | 0.000 | 9.001 |
-| 0 | 16.226 | 3.066 | 0.518 | 1.202 | 0.812 | 8.611 | 0.000 | 8.598 |
-| 0 | 12.134 | 3.796 | 3.579 | 1.378 | 0.690 | 10.323 | 0.000 | 10.311 |
-| 0 | 17.496 | 2.817 | 0.523 | 1.267 | 1.160 | 9.416 | 0.000 | 9.403 |
-
-Initial read:
-
-The main repeated cost is issuing `glReadPixels(..., nullptr)` into the PBO. `glBufferData`, setup, fence creation, fence wait, map, and CPU copy are not large enough to explain the underruns.
-
-## Experiment 1: sync
-
-Status: sampled
-
-Question:
-
-Does the direct synchronous readback path perform better or worse than the current PBO path on this machine and DeckLink format?
-
-Expected interpretation:
-
- If `syncReadMs` is lower than `asyncQueueReadPixelsMs` and the ready queue improves, the current PBO path is the wrong strategy for this driver/format.
- If `syncReadMs` is also high and the ready queue remains empty, any GPU-to-CPU readback in this path is too expensive for the current producer cadence.
-
-Results:
-
-Date: 2026-05-11
-
-Summary:
-
- ready queue remained pinned at 0
- underrun, zero-depth, late, and dropped counts continued increasing
- `asyncQueueMs` and async readback counters were 0, confirming the experiment mode was active
- direct `syncReadMs` was generally worse than the baseline PBO `asyncQueueReadPixelsMs`
-
-Representative samples:
-
-| readyDepth | renderRequestMs | queueWaitMs | drawMs | syncReadMs | asyncQueueMs | syncFallbackCount |
-| --- | ---: | ---: | ---: | ---: | ---: | ---: |
-| 0 | 32.467 | 5.764 | 1.389 | 23.122 | 0.000 | 680 |
-| 0 | 29.722 | 2.603 | 0.512 | 25.538 | 0.000 | 697 |
-| 0 | 37.844 | 7.716 | 0.518 | 23.608 | 0.000 | 706 |
-| 0 | 22.304 | 3.089 | 1.843 | 15.278 | 0.000 | 723 |
-| 0 | 27.196 | 4.015 | 0.500 | 21.933 | 0.000 | 736 |
-
-Read:
-
-Direct synchronous readback does not recover the queue and is slower than the async PBO path on the sampled run. The bottleneck appears to be GPU-to-CPU readback itself, not PBO orphaning or fence handling.
-
-## Experiment 2: cached_only
-
-Status: sampled
-
-Question:
-
-If ongoing GPU readback is removed after bootstrap, can the producer keep the ready queue above 0?
-
-Expected interpretation:
-
- If ready depth rises and underruns slow or stop, readback is the primary bottleneck.
- If ready depth still stays near 0, the bottleneck is elsewhere in scheduling, frame acquisition, queueing, or DeckLink handoff.
-
-Results:
-
-Date: 2026-05-11
-
-User-visible result:
-
- DeckLink reported a healthy 5-frame buffer.
-
-Telemetry summary:
-
- `renderRequestMs` dropped to roughly 1-3 ms.
- `cachedCopyMs` was usually around 0.8-1.0 ms, with one sampled low value around 0.37 ms.
- `asyncQueueMs`, `asyncQueueReadPixelsMs`, `syncReadMs`, fence wait, map, and async copy were 0 after bootstrap.
- `syncFallbackCount` stayed at 1, confirming one bootstrap readback.
- `cachedFallbackCount` increased continuously, confirming ongoing frames were served from cached CPU memory.
- late and dropped counts were 0 during the sampled run.
- internal ready queue depth still reported mostly 0-1 even while DeckLink showed a healthy hardware/device buffer.
-
-Representative samples:
-
-| readyDepth | renderRequestMs | queueWaitMs | drawMs | cachedCopyMs | asyncQueueMs | syncReadMs | late | dropped |
-| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-| 0 | 1.446 | 0.018 | 0.518 | 0.864 | 0.000 | 0.000 | 0 | 0 |
-| 0 | 2.586 | 1.089 | 0.514 | 0.829 | 0.000 | 0.000 | 0 | 0 |
-| 0 | 1.481 | 2.378 | 0.502 | 0.911 | 0.000 | 0.000 | 0 | 0 |
-| 0 | 0.892 | 0.013 | 0.468 | 0.371 | 0.000 | 0.000 | 0 | 0 |
-| 1 | 1.398 | 0.019 | 0.483 | 0.819 | 0.000 | 0.000 | 0 | 0 |
-
-Read:
-
-Removing ongoing GPU readback recovers output timing immediately. The direct cause of the Phase 7.5 playback collapse is the per-frame GPU-to-CPU readback cost, not DeckLink frame acquisition, output frame end-access, PBO allocation, fence waiting, or CPU copy.
-
-The internal ready queue depth still being low while DeckLink reports a healthy device buffer suggests the ready queue is acting as a short staging queue rather than the full device playout buffer. For the next fix, prioritize avoiding a blocking readback on every output frame instead of only increasing internal ready queue depth.
-
-## Experiment 4: BGRA8 pack framebuffer async readback
-
-Status: sampled
-
-Date: 2026-05-11
-
-Change:
-
- The output path now packs/blits the final output into a BGRA8-compatible framebuffer before readback.
- Async readback reads from the pack framebuffer using `GL_BGRA` / `GL_UNSIGNED_INT_8_8_8_8_REV`.
- The deeper async PBO ring remains active.
-
-Question:
-
-Does making the GPU output/readback format match the DeckLink BGRA8 scheduling format reduce the driver-side `glReadPixels` stall?
-
-User-visible result:
-
- Long pauses appear to be gone.
- Playback still stutters, but the stutters look limited to a few frames rather than multi-second freezes.
-
-Telemetry summary:
-
- Throughput recovered to roughly real time in the sampled window.
- Over 5 seconds, the app pushed and popped 305 output frames.
- `asyncQueueReadPixelsMs` dropped from the earlier 8-14 ms range to roughly 0.05-0.13 ms in the representative samples.
- `renderMs` usually sat around 2-5 ms in the sampled burst.
- Late and dropped frame counts did not increase during the 5 second delta sample.
- The ready queue still repeatedly touched 0 and accumulated underruns, which matches the remaining short stutters.
-
-Representative samples:
-
-| readyDepth | renderMs | smoothedRenderMs | drawMs | mapMs | copyMs | asyncQueueReadPixelsMs | queueWaitMs |
-| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-| 0 | 4.855 | 9.494 | 0.570 | 0.234 | 0.822 | 0.128 | 0.026 |
-| 0 | 1.957 | 9.041 | 0.468 | 0.139 | 0.604 | 0.048 | 0.016 |
-| 0 | 3.366 | 5.879 | 0.513 | 1.166 | 0.692 | 0.129 | 0.022 |
-| 0 | 5.208 | 6.492 | 2.209 | 1.358 | 0.714 | 0.090 | 0.061 |
-| 0 | 2.957 | 8.852 | 0.537 | 1.041 | 0.547 | 0.087 | 0.040 |
-
-Five-second delta:
-
-| pushed | popped | ready underruns | zero-depth samples | late delta | dropped delta | scheduled lead |
-| ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-| 305 | 305 | 129 | 671 | 0 | 0 | 20 |
-
-Read:
-
-The main readback stall appears to have been the previous format/path combination, not unavoidable BGRA8 bandwidth. The remaining problem now looks like cadence and buffering: the producer can average real-time throughput again, but the ready queue still runs empty often enough to create visible short stutters.
-
-## Experiment 5: producer burst-fill ready queue
-
-Status: sampled
-
-Date: 2026-05-12
-
-Change:
-
- The output producer now honors `OutputProductionDecision::requestedFrames` instead of always producing one frame per wake.
- The producer no longer applies its wake-interval throttle while the ready queue is below target depth.
- Completion fallback remains conservative; the background producer is responsible for building the cushion after immediate scheduling.
-
-Question:
-
-Now that BGRA8 readback is fast enough on average, can the producer maintain a small ready-frame cushion instead of hovering at zero?
-
-Expected interpretation:
-
- If short stutters reduce and `readyQueue.depth` spends more time above zero, the remaining issue was producer cadence/headroom.
- If `readyQueue.depth` still remains pinned near zero, inspect render-thread contention next: preview present, input upload, runtime-event bursts, and live-state composition.
- If render spikes increase, burst production may be overloading the shared render thread and should be tuned with a smaller target/depth policy.
-
-Result:
-
- User-visible playback looked about the same.
- DeckLink reported a healthier 10-frame buffer.
- The app ready queue now briefly reaches 1-3, but still often drains to 0.
- No late, dropped, flushed, async-miss, or cached-fallback deltas were observed in the 8-second sample.
- Readback remained fast.
-
-Representative samples:
-
-| readyDepth | renderMs | smoothedRenderMs | drawMs | mapMs | copyMs | asyncQueueReadPixelsMs | queueWaitMs |
-| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-| 0 | 4.756 | 10.135 | 0.502 | 0.186 | 0.603 | 0.088 | 0.032 |
-| 1 | 5.135 | 6.968 | 0.730 | 1.269 | 0.772 | 0.088 | 0.073 |
-| 1 | 3.578 | 6.821 | 0.702 | 1.247 | 0.618 | 0.097 | 0.103 |
-| 1 | 6.733 | 7.996 | 0.537 | 0.952 | 0.694 | 0.082 | 1.218 |
-| 0 | 5.276 | 16.782 | 0.550 | 0.119 | 0.766 | 0.090 | 0.016 |
-
-Eight-second delta:
-
-| pushed | popped | ready underruns | zero-depth samples | late delta | dropped delta | async misses | cached fallbacks | system scheduled |
-| ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-| 477 | 478 | 109 | 291 | 0 | 0 | 0 | 0 | 12 |
-
-Read:
-
-Burst filling improved device-side buffering but did not remove visible cadence issues. The remaining stutter is less likely to be raw output readback or device starvation. Next candidates are render-thread interference and pacing jitter: preview present, input upload, runtime-event/live-state bursts, and occasional completion/render spikes.
-
-## Experiment 6: producer work-before-sleep pacing
-
-Status: ready for hardware test
-
-Date: 2026-05-12
-
-Change:
-
- The output producer now checks ready-queue pressure before waiting on the producer condition variable.
- When production is requested, the producer renders immediately instead of first sleeping for `OutputProducerWakeInterval()`.
- The wake interval remains as the idle/no-work sleep path, not as a mandatory pre-production delay.
-
-Question:
-
-Does removing the unconditional pre-check sleep let the producer rebuild queue headroom more quickly after a shallow-queue or focus-related disturbance?
-
-Expected interpretation:
-
- If DeckLink buffer depth is steadier and ready-queue underruns slow, the pre-production sleep was part of the cadence loss.
- If the result is unchanged, the next likely culprit is render-thread interference rather than producer wake timing.
- If CPU usage rises while playback does not improve, the producer may need a more explicit event/pacing model instead of tighter polling.
-
-## Experiment 7: remove just-in-time render from completion path
-
-Status: ready for hardware test
-
-Date: 2026-05-12
-
-Change:
-
- DeckLink completion processing no longer renders an output frame synchronously when the ready queue is empty.
- Completion now schedules an already-ready frame if one exists, otherwise it uses the explicit underrun fallback and wakes the producer.
- The producer is now solely responsible for rendering ahead and keeping the ready queue fed.
-
-Question:
-
-Does removing completion-time rendering make output cadence more stable by keeping DeckLink completion handling short and predictable?
-
-Expected interpretation:
-
- If playback improves or completion pacing spikes shrink, just-in-time rendering in the completion path was harming cadence.
- If underrun/fallback counts increase, the producer still is not maintaining enough ready headroom.
- If visible output gets worse but telemetry is clearer, implement a real repeat-last-system-frame fallback instead of rendering from completion.
-
-## Experiment 8: four-frame DeckLink preroll
-
-Status: ready for hardware test
-
-Date: 2026-05-12
-
-Change:
-
- `VideoPlayoutPolicy::targetPrerollFrames` is reduced from 12 to 4.
- The system-memory frame pool remains larger than the DeckLink preroll so the producer can still build app-side ready headroom.
-
-Question:
-
-Can a smaller DeckLink scheduled buffer stay stable now that BGRA8 readback is fast and the producer is responsible for render-ahead?
-
-Expected interpretation:
-
- If DeckLink holds around 4 frames and playback cadence is acceptable, a large 10-12 frame device buffer is not required.
- If focus changes or render-thread jitter drain DeckLink below 4, the next work should prioritize real device-buffer telemetry and render-thread interference.
- If black flicker continues, it is the explicit underrun fallback being exposed by the no-JIT completion path, not a lack of DeckLink preroll alone.
-
-## Experiment 9: no steady-state black fallback
-
-Status: sampled
-
-Date: 2026-05-12
-
-Change:
-
- Normal DeckLink completion processing no longer schedules a black fallback frame when the app ready queue is empty.
- `RenderOutputQueue::TryPop()` still records the app-ready underrun.
- The producer is woken and the existing DeckLink scheduled buffer is allowed to carry playback.
- The four-frame DeckLink preroll experiment remains active.
-
-Question:
-
-Was the visible black flicker caused by treating an app-ready queue miss as immediate device starvation?
-
-Expected interpretation:
-
- If black flicker disappears while app-ready underruns still increase, the fallback was too aggressive and should stay out of the steady-state path.
- If DeckLink buffer drains or late/dropped frames increase, we need real device-buffer telemetry and a controlled emergency policy.
- If visible stutter remains without black, the next work is cadence attribution: preview present, input upload, render-thread priority, and actual DeckLink buffered-frame count.
-
-Result:
-
- Playback was smooth briefly, then froze once the DeckLink buffer reached 0.
- The buffer did not refill.
-
-Read:
-
-Removing the black fallback exposed another completion-driven assumption. The producer could render into the app ready queue, but scheduling still happened only from completion processing. Once the scheduled DeckLink buffer reached 0, completions stopped, so no later trigger scheduled the producer's ready frames.
-
-## Experiment 10: producer-side scheduling
-
-Status: sampled
-
-Date: 2026-05-12
-
-Change:
-
- The producer now schedules the frames it produces instead of waiting for a future DeckLink completion to schedule them.
- A dedicated output scheduling mutex serializes scheduling calls from the producer and completion worker.
- The four-frame DeckLink preroll and no steady-state black fallback experiments remain active.
-
-Question:
-
-Can the producer maintain the four-frame DeckLink buffer without relying on completion-time rendering or black fallback insertion?
-
-Expected interpretation:
-
- If the buffer refills and playback no longer freezes, producer-side scheduling is required for a real proactive playout model.
- If black flicker is gone but stutter remains, focus on render-thread jitter and actual device-buffer telemetry.
- If the buffer overfills or scheduling timing becomes odd, add real DeckLink buffered-frame telemetry and schedule only up to a measured target.
-
-Result:
-
- The DeckLink buffer stayed full.
- Playback had a low-framerate look.
- Over a 6-second sample, `pushedDelta` and `poppedDelta` were 310, but `underrunDelta` was also 310.
- Late and dropped counts increased.
- Synthetic scheduled lead grew very large, indicating producer-side scheduling was running too far ahead of the intended four-frame cushion.
-
-Read:
-
-Producer-side scheduling is required, but it must be capped by a real scheduling target. Scheduling every produced frame overfeeds the scheduler timeline and can produce odd cadence even when the DeckLink buffer appears full.
-
-## Experiment 11: cap producer scheduling to preroll target
-
-Status: sampled
-
-Date: 2026-05-12
-
-Change:
-
- The producer still renders proactively.
- After production, it schedules ready frames only until the system-memory scheduled count reaches `VideoPlayoutPolicy::targetPrerollFrames`.
- With the current experiment settings, that target remains four frames.
-
-Question:
-
-Can producer-side scheduling keep the four-frame buffer fed without running hundreds of frames ahead in scheduler time?
-
-Expected interpretation:
-
- If the low-framerate look disappears and the buffer stays around four, producer scheduling needed a cap.
- If the buffer drains, the cap needs actual DeckLink `GetBufferedVideoFrameCount()` telemetry rather than system-memory scheduled-count approximation.
- If stutter remains with sane lead, investigate render-thread interference next.
-
-Result:
-
- Playback still had the low-framerate look.
- The system-memory scheduled count held at the four-frame target.
- Synthetic scheduled lead still grew, with scheduled frame index advancing faster than completed frame index.
-
-Read:
-
-The cap was active, but completion and producer were both still scheduling ready frames. The result was still over-scheduling relative to completions, even though the system-memory scheduled count stayed at four.
-
-## Experiment 12: producer owns steady-state scheduling
-
-Status: sampled
-
-Date: 2026-05-12
-
-Change:
-
- Completion processing now releases completed frames, records telemetry, and wakes the producer only.
- Completion no longer schedules from the ready queue during steady state.
- Producer-side scheduling remains capped to `targetPrerollFrames`.
-
-Question:
-
-Does having a single steady-state scheduler stop the schedule timeline from running ahead and recover normal cadence?
-
-Expected interpretation:
-
- If scheduled lead stops growing and playback cadence improves, duplicate completion/producer scheduling was the low-framerate cause.
- If the buffer drains, the producer wake/schedule loop is still not responsive enough.
- If lead still grows, inspect `VideoPlayoutScheduler` catch-up accounting next.
-
-Result:
-
- Playback froze on startup.
- Telemetry showed rendered ready frames in the app ready queue, but zero system-memory frames scheduled.
-
-Read:
-
-Removing completion-side scheduling exposed another producer-loop gap. The producer only scheduled immediately after producing frames. Once the ready queue reached its max depth, production stopped, and the already-ready frames were never handed to DeckLink.
-
-## Experiment 13: producer top-up scheduling before production
-
-Status: pending hardware build
-
-Date: 2026-05-12
-
-Change:
-
- The producer now attempts to top up DeckLink scheduling from already-ready frames before deciding whether to render more frames.
- The producer also top-ups after successful production.
- Completion remains release/record/wake only.
-
-Question:
-
-Can the producer own steady-state scheduling without freezing when ready frames already exist?
-
-Expected interpretation:
-
- If startup no longer freezes and the four-frame buffer stays stable, the producer needed an explicit schedule-before-produce pass.
- If cadence is still wrong, the next target is scheduler timeline accounting or actual DeckLink buffered-frame telemetry.
-
-Result:
-
- Playback alternated between smooth playback and freezes.
- The app ready queue was no longer starving; it held around 3-4 frames and had no new ready underruns in the sampled delta.
- Late and dropped counts increased.
- `scheduledIndexDelta` was much larger than `completedIndexDelta`, even with producer scheduling capped.
-
-Read:
-
-The proactive producer now feeds the app queue, but `VideoPlayoutScheduler` catch-up accounting still advances scheduled stream time on late/drop recovery. That creates timeline gaps and produces the smooth/freeze/smooth cadence.
-
-## Experiment 14: disable late/drop catch-up skipping
-
-Status: pending hardware build
-
-Date: 2026-05-12
-
-Change:
-
- `VideoPlayoutPolicy::lateOrDropCatchUpFrames` is set to 0.
- Late/drop results should still be reported, but the scheduler should not advance `mScheduledFrameIndex` by extra catch-up frames.
-
-Question:
-
-Does removing schedule-time skipping stop the smooth/freeze cadence now that the producer owns steady-state scheduling?
-
-Expected interpretation:
-
- If `scheduledIndexDelta` closely matches actual scheduled/completed frame flow and playback smooths out, catch-up skipping was harmful in proactive mode.
- If late/dropped counts still climb without catch-up, inspect actual DeckLink buffered-frame count and render-thread interference.
--- a/docs/PHASE_7_6_SYSTEM_MEMORY_PLAYOUT_BUFFER_DESIGN.md
+++ b/docs/PHASE_7_6_SYSTEM_MEMORY_PLAYOUT_BUFFER_DESIGN.md
@@ -1,359 +0,0 @@
-# Phase 7.6: System-Memory Playout Buffer Design
-
-## Status
-
-In progress.
-
-Follow-up direction:
-
- Phase 7.6 proved the BGRA8 system-memory path and exposed the need for a larger cadence/scheduler split.
- Continue the broader rewrite in [PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md).
-
-Implemented so far:
-
- BGRA8 `SystemOutputFramePool` with non-GL tests
- render/readback production now writes into app-owned system-memory slots
- DeckLink output scheduling can wrap system-memory slots with `CreateVideoFrameWithBuffer()`
- DeckLink completion callbacks release scheduled system-memory slots
- ready-queue discard paths release owned frames instead of leaking slots
- telemetry scaffolding exposes free, ready, and scheduled system-memory frame counts
- async PBO readback is now a deeper pipeline by default and ordinary misses no longer flush queued readbacks
- the output producer now honors requested burst production when the ready queue is below target instead of producing only one frame per wake
-
-Still to verify/tune on hardware:
-
- sustained DeckLink buffer depth
- frame age at schedule/completion
- repeat/underrun policy behavior under real stalls
- whether deeper async readback reduces sawtooth buffer drain
- whether BGRA8 bandwidth is sufficient before considering v210
- whether burst filling keeps `readyQueue.depth` above zero and reduces the remaining short stutters
-
-Phase 7.5 isolated the current playout timing problem around output readback and DeckLink scheduling pressure. The fast-transfer path from the DeckLink OpenGL sample is not available on the current test GPU, so the next direction is to make the normal path behave more like broadcast playout systems: render ahead, read back into system-memory frame buffers, and let DeckLink consume already-complete frames.
-
-This phase is not a move away from rendering every frame. It is a move away from making DeckLink wait for each frame to be rendered and read back at the moment it needs to be scheduled.
-
-## SDK Finding: RGBA8 Is Not Required
-
-DeckLink output frames do not have to be RGBA8/BGRA8.
-
-The SDK accepts a `BMDPixelFormat` when creating output frames. The available formats include:
-
- `bmdFormat8BitYUV`
- `bmdFormat10BitYUV`
- `bmdFormat10BitYUVA`
- `bmdFormat8BitARGB`
- `bmdFormat8BitBGRA`
- `bmdFormat10BitRGB`
- `bmdFormat12BitRGB`
-
-The SDK samples also use non-RGBA output paths:
-
- `FilePlayback` converts unsupported source frames to `bmdFormat10BitYUV`
- `PlaybackStills` uses `bmdFormat10BitYUV`
- `InputLoopThrough` handles `bmdFormat10BitYUV` and related formats
- `SignalGen` exposes 8-bit YUV, 10-bit YUV, 8-bit RGB, and 10-bit RGB choices
- `OpenGLOutput` uses BGRA for that sample path, but that is not a DeckLink API requirement
-
-The current app already has partial support for this direction:
-
- `DeckLinkSession` probes support for `bmdFormat10BitYUV` and `bmdFormat10BitYUVA`
- `VideoIOPixelFormat::V210` maps to `bmdFormat10BitYUV`
- `VideoIOPixelFormat::Yuva10` maps to `bmdFormat10BitYUVA`
- `RenderEngine` has a 10-bit output packing path
- row-byte calculation already distinguishes `bmdFormat10BitYUV`, `bmdFormat10BitYUVA`, and BGRA-style formats
-
-So yes: we can pack before readback later if bandwidth proves to be the remaining bottleneck. For the first Phase 7.6 implementation, keep BGRA8 as the active output format and focus on the larger architectural problem: DeckLink should schedule from completed system-memory frames instead of waiting on the current render/readback operation.
-
-## Goal
-
-Create a buffered system-memory playout path:
-
- render every output frame
- keep BGRA8 as the first output/readback format
- read back into reusable CPU/system-memory frame slots
- keep a small queue of completed frames ahead of DeckLink
- schedule DeckLink from completed frames rather than from in-progress rendering
- preserve telemetry so every experiment can be compared against Phase 7.5
-
-## Non-Goals
-
- Do not reintroduce NVIDIA DVP or AMD pinned-memory as a required path.
- Do not hide dropped, repeated, or late frames.
- Do not make cached-output playback the default production behavior.
- Do not add a large latency buffer without making that latency explicit.
- Do not rewrite shader/effect evaluation unless profiling proves it is the bottleneck.
- Do not make v210/YUV packing part of the first implementation unless BGRA8 buffering is proven insufficient.
-
-## Architecture
-
-### Current Problem Shape
-
-The current path is still too close to:
-
-1. DeckLink needs a frame.
-2. App renders or finalizes a frame.
-3. App reads back from GL.
-4. App schedules the frame.
-
-That can work only if every step reliably fits inside the frame budget. When readback stalls or scheduling is delayed, DeckLink sees a shallow buffer and playback freezes.
-
-### Target Shape
-
-The target path is:
-
-1. Render producer prepares future frames.
-2. GPU output is read back as BGRA8 into the selected system frame slot.
-3. Readback fills a free system-memory frame slot.
-4. Completed slots enter a ready queue.
-5. DeckLink scheduler consumes ready slots at output cadence.
-6. Completion callbacks release slots back to the pool.
-
-This gives the scheduler a small cushion without sacrificing rendered frames.
-
-## Pixel Format Strategy
-
-### First Target: BGRA8
-
-Use BGRA8 as the first serious output target.
-
-Reasons:
-
- it is the path closest to the current renderer
- it avoids introducing color-space packing risk while the buffering architecture is still being proven
- it keeps alpha/keying behavior easier to reason about
- it lets Phase 7.6 isolate scheduling/readback ownership from pixel-format conversion
-
-Known byte cost at 1920x1080:
-
- BGRA8: about 8.29 MB per frame
-
-That is larger than v210, but the immediate hypothesis is that the freezes come from scheduling coupling and readback stalls, not only raw byte count. Prove or disprove that with the system-memory queue first.
-
-### Later Target: 10-bit YUV / v210
-
-Keep v210 available as a later optimization.
-
-Reasons to revisit it:
-
- it is a native DeckLink output format
- it can reduce 1920x1080 readback size from about 8.29 MB per BGRA8 frame to about 5.53 MB per v210 frame
- it may better match final video I/O expectations for fill-only output
-
-Do this only after the BGRA8 system-memory queue is measured. If BGRA8 buffering keeps DeckLink healthy, v210 becomes a quality/bandwidth refinement rather than a rescue path.
-
-### Alpha / Keying
-
-For the first implementation, BGRA8 remains the default target.
-
-For alpha/key workflows, `bmdFormat10BitYUVA` may be needed, or key/fill may need to remain split depending on the device mode and keyer configuration. Phase 7.6 should make this explicit rather than assuming one format fits both.
-
-## Proposed Components
-
-### `SystemOutputFramePool`
-
-Owns reusable CPU-side frame slots.
-
-Responsibilities:
-
- allocate a fixed number of output slots
- expose free slots to the render/readback producer
- expose completed slots to the DeckLink scheduler
- track slot generation, frame id, frame time, and pixel format
- prevent reuse while DeckLink still owns or may read the frame
-
-### `OutputFrameSlot`
-
-Represents one CPU/system-memory playout frame.
-
-Likely contents:
-
- pointer to writable frame bytes
- row bytes
- width and height
- `BMDPixelFormat` or app-level equivalent
- frame number / stream time
- timing metadata
- completion state
- optional DeckLink frame wrapper
-
-### `DeckLinkOutputFrameAdapter`
-
-Bridges app-owned memory to DeckLink output frames.
-
-Options to evaluate:
-
- create DeckLink frames with app-owned buffers where supported by the SDK
- keep DeckLink-created frames in the pool and write directly into their bytes
- wrap app memory behind a small `IDeckLinkVideoFrame` implementation only if needed
-
-The simplest production path should avoid an extra CPU copy between app memory and DeckLink memory.
-
-### `OutputFrameProducer`
-
-Runs on or is driven by the render thread.
-
-Responsibilities:
-
- acquire a free system frame slot
- render the next frame
- read back BGRA8 into the slot
- publish the slot to the ready queue
- record readback timings
-
-### `DeckLinkPlayoutScheduler`
-
-Consumes completed system frames.
-
-Responsibilities:
-
- keep DeckLink scheduled ahead by the configured target depth
- schedule from the ready queue
- repeat/drop according to explicit policy when the queue is empty or too deep
- release frame slots after DeckLink completion callbacks
- report buffer depth and scheduling lead
-
-## Migration Plan
-
-### Step 1: Make Output Pixel Format Explicit Everywhere
-
-Current format selection exists, but Phase 7.6 should make it impossible to confuse render texture format, readback format, and DeckLink scheduled format.
-
-Deliverables:
-
- log selected DeckLink output pixel format at startup
- expose readback bytes per frame in telemetry
- expose whether the frame was BGRA, v210, or YUVA
- make BGRA8 the default and first supported system-buffer path
-
-### Step 2: Introduce the BGRA8 System Frame Pool
-
-Add a fixed-size pool of BGRA8 system-memory output slots.
-
-Initial target depth:
-
- 3 ready/scheduled frames minimum
- 5 frames as the practical DeckLink-health target
- configurable for experiments
-
-The pool should be testable without OpenGL or DeckLink hardware.
-
-### Step 3: Read Back BGRA8 Into Pool Slots
-
-Move readback output away from transient buffers and into acquired frame slots.
-
-The producer must never block DeckLink scheduling while waiting for a free slot if a safe repeat/drop policy can keep playback alive.
-
-### Step 4: Schedule From Completed Slots
-
-Change DeckLink scheduling to consume completed system frames.
-
-DeckLink callbacks should become the point where slots are returned to the pool.
-
-This is the main behavioral change: scheduling no longer waits for the active render/readback operation.
-
-### Step 5: Add Playout Policies
-
-Make underflow and overflow behavior explicit.
-
-Possible policies:
-
- repeat last completed frame on underflow
- schedule black on startup only
- drop oldest completed frame if the producer gets too far ahead
- preserve most recent frame for live-control responsiveness
-
-The default should favor stable output cadence and visible telemetry over silent correctness guesses.
-
-### Step 6: Tune Buffer Depth and Latency
-
-Measure:
-
- render time
- readback time
- CPU copy time, if any
- ready queue depth
- scheduled queue depth
- frame age at schedule time
- frame age at display callback
- repeats, drops, and underruns
-
-Then choose a default buffer depth that keeps DeckLink healthy without adding unnecessary latency.
-
-### Step 7: Optional v210 Experiment
-
-Only after BGRA8 buffering has been measured, add a runtime option that forces:
-
- GPU pack to v210
- readback of packed v210 bytes
- DeckLink scheduling as `bmdFormat10BitYUV`
-
-This should be compared against the completed BGRA8 system-memory path, not against the older coupled path.
-
-## Telemetry
-
-Keep the Phase 7.5 counters and add:
-
- `outputPixelFormat`
- `outputReadbackBytes`
- `outputPackMode`
- `systemFramePoolFree`
- `systemFramePoolReady`
- `systemFramePoolScheduled`
- `systemFrameAgeAtScheduleMs`
- `systemFrameAgeAtCompletionMs`
- `systemFrameUnderruns`
- `systemFrameRepeats`
- `systemFrameDrops`
- `deckLinkScheduleLeadFrames`
- `deckLinkScheduleLeadMs`
-
-Telemetry scaffolding can land before the frame pool itself. Until `SystemOutputFramePool` exists, these fields should remain producer-owned gauges/counters with default zero values in `HealthTelemetry`; they should not be inferred from the existing render-ready queue or DeckLink pool because those are adjacent concepts, not the final free/ready/scheduled system-memory slot model.
-
-Existing counters that should remain useful:
-
- render frame time
- async queue time
- readback timing
- output queue depth
- displayed late count
- dropped count
- DeckLink buffered frame count
-
-## Tests
-
-Add non-GL tests for:
-
- frame pool acquire/publish/consume/release
- slots are not reused while scheduled
- underflow repeats the last completed frame when configured
- overflow drops according to policy
- row-byte and byte-size calculation for BGRA8 first, with v210 and YUVA covered when those modes are enabled
- scheduler consumes only completed frames
- completion callback releases the expected slot
-
-Hardware/manual tests:
-
- BGRA8 system-buffered output works
- DeckLink buffer depth stays healthy
- no black-frame startup longer than configured preroll
- shutdown drains or releases scheduled slots safely
-
-## Risks
-
- DeckLink frame ownership rules may force one extra copy if app-owned buffers are not accepted in the exact path we use.
- Buffering improves cadence but adds latency.
- If GPU readback itself remains slower than real time, buffering only delays the underflow.
- v210 remains a future optimization and may still carry color-space/keying risk when introduced.
-
-## Exit Criteria
-
-Phase 7.6 is complete when:
-
- DeckLink output format is explicit and logged
- BGRA8 system-memory output slots are the default playout path
- completed system-memory frames are queued ahead of DeckLink scheduling
- DeckLink callbacks release/recycle frame slots
- ready/scheduled buffer depth is visible in telemetry
- underflow/repeat/drop behavior is explicit and tested
- the app can sustain a healthy DeckLink buffer without using cached-output playback
--- a/docs/PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md
+++ b/docs/PHASE_7_7_RENDER_CADENCE_PLAYOUT_DESIGN.md
@@ -1,492 +0,0 @@
-# Phase 7.7: Render Cadence And Playout Separation Design
-
-## Status
-
-In progress.
-
-Implemented so far:
-
- real DeckLink buffered-frame telemetry is exposed separately from synthetic scheduler lead
- pure `RenderCadenceController` exists with non-GL tests
- `SystemOutputFramePool` now exposes the Phase 7.7 state vocabulary: `Free`, `Rendering`, `Completed`, `Scheduled`
- the output producer now uses `RenderCadenceController` to render one output frame per cadence tick
- DeckLink scheduling remains a separate top-up pass capped by the configured preroll target
-
-Phase 7.5 and 7.6 proved useful pieces individually:
-
- BGRA8 pack/readback can be fast enough on the current test machine.
- System-memory frame slots can be wrapped for DeckLink scheduling.
- A producer can keep frames ready and keep a small scheduled buffer filled.
-
-But the experiments also showed that the current hybrid ownership model is fragile:
-
- completion-driven rendering caused app-ready starvation
- completion-time black fallback caused visible black flicker
- producer-side scheduling without a cadence target overfed the schedule timeline
- capping scheduled count helped, but completion and producer scheduling fought each other
- making completion passive exposed startup and scheduling-trigger gaps
- late/drop catch-up skipping created smooth/freeze/smooth cadence
-
-The lesson is that the app needs a larger architectural split, not more local recovery branches.
-
-## Goal
-
-Make the output path behave like two cooperating real-time systems:
-
-```text
-Render cadence thread
-  renders at the selected output cadence, for example 59.94 fps
-  writes completed frames into system-memory slots
-
-DeckLink playout scheduler
-  keeps the device scheduled buffer topped up
-  consumes completed system-memory frames
-  never asks rendering to happen synchronously
-```
-
-The system-memory frame buffer becomes the contract between render timing and device timing.
-
-Core principle:
-
- The render cadence should be stable and boring.
- If the selected output mode is 59.94 fps, the render producer should attempt to render at 59.94 fps.
- It should not speed up just because the DeckLink buffer is empty.
- It should not slow down because DeckLink is full or because completed frames have not drained.
- Completed-but-unscheduled frames are a latest-N cache. Old completed frames may be dropped/recycled to keep rendering at cadence.
- Scheduled frames are protected until DeckLink completes them.
- The only normal reason for the render cadence to deviate is that rendering/GPU work itself overruns the frame budget.
-
-## Non-Goals
-
- Do not hide failure by repeating frames as the primary strategy.
- Do not make DeckLink completion callbacks render frames.
- Do not use synthetic schedule-index catch-up as normal recovery.
- Do not change shader semantics or live-state semantics.
- Do not require v210/YUV packing in the first implementation.
- Do not pursue DVP/pinned-memory fast transfer as the main path on unsupported hardware.
-
-## Target Architecture
-
-### Current Problem Shape
-
-The current Phase 7.5/7.6 implementation still has too many timing authorities:
-
- DeckLink completion callbacks release frames and influence scheduling
- the producer renders based on queue pressure
- the producer also schedules some frames
- `VideoPlayoutScheduler` advances synthetic stream-time indexes
- fallback behavior can schedule black when the app-ready queue is briefly empty
-
-That means the system can be full and still look wrong, because "full" is not tied to one clear cadence owner.
-
-### Target Shape
-
-```text
-Startup / warmup
-  render cadence starts first
-  render thread produces warmup frames at the selected cadence
-  completed system-memory queue reaches warmup target
-  DeckLink preroll is scheduled from completed frames
-  DeckLink playback starts with a filled buffer
-
-Steady state
-RenderCadenceController
-  owns output frame tick: frame 0, 1, 2...
-  owns render target time
-  asks RenderEngine to render frame N
-  publishes completed frame N into PlayoutFrameStore
-
-PlayoutFrameStore
-  owns free / rendering / completed / scheduled slots
-  tracks frame number, render time, completion time, and schedule state
-  exposes latest completed frames to DeckLink scheduler
-  may drop/recycle oldest unscheduled completed frames when render cadence needs space
-
-DeckLinkPlayoutScheduler
-  owns DeckLink schedule time
-  tops up device buffered frames to target depth
-  consumes completed frames only
-  releases scheduled slots on completion callbacks
-
-DeckLink completion callback
-  releases completed slots
-  records result and device timing
-  wakes scheduler
-  does not render
-```
-
-## Cadence Model
-
-The render side should be time-driven, not completion-driven.
-
-For a 59.94 fps mode:
-
-```text
-frameDuration = 1001 / 60000 seconds
-nextRenderTime = now
-
-loop:
-  wait until nextRenderTime, or run immediately if behind
-  render frameIndex for nextRenderTime
-  read back into free system-memory slot
-  publish completed slot
-  frameIndex += 1
-  nextRenderTime += frameDuration
-```
-
-Rules:
-
- If the render thread is early, it waits/yields.
- If it is slightly late, it renders the next frame immediately and records lateness.
- If it is badly late because render/GPU work overran the frame budget, policy may skip render ticks before rendering the newest frame.
- Skipping render ticks is an overrun policy, not a buffer-fill strategy.
- DeckLink schedule time should remain continuous unless a deliberate device recovery policy says otherwise.
-
-Non-rule:
-
- The render producer must not render faster than the selected cadence to refill DeckLink.
- DeckLink should start only after warmup/preroll has filled enough completed frames.
- If the DeckLink buffer drains in steady state, that is a real timing failure to measure, not a signal for the render thread to sprint.
-
-## Buffer Model
-
-Use a fixed system-memory slot pool.
-
-The completed portion of the pool is not a strict consume-before-render queue. It is a latest-N rendered-frame cache:
-
- render cadence writes one frame per selected output tick
- if completed-but-unscheduled frames are full, the oldest completed frame is disposable
- DeckLink scheduling consumes from the completed cache when it needs frames
- frames already scheduled to DeckLink are never recycled until completion
- if all slots are scheduled/in flight, cadence may miss because there is genuinely no safe system-memory target
-
-Suggested starting values:
-
- completed-frame target: 2-4 frames
- DeckLink scheduled target: 4 frames for experiments
- total system slots: scheduled target + completed target + rendering spare + safety spare
-
-For example:
-
-```text
-scheduled target: 4
-completed target: 3
-rendering/spare: 2
-total slots: 9
-```
-
-Slot states:
-
- `Free`
- `Rendering`
- `Completed`
- `Scheduled`
-
-Each slot should carry:
-
- frame index
- render target timestamp
- render completion timestamp
- pixel format
- row bytes and size
- schedule timestamp/index when scheduled
- completion result when released
-
-## Scheduling Model
-
-The DeckLink scheduler should top up to a target device depth.
-
-```text
-on scheduler wake:
-  while actualDeckLinkBufferedFrames < targetScheduledFrames:
-    frame = completedStore.popOldestCompleted()
-    if no frame:
-      record completed-frame underrun
-      break
-    schedule frame at next continuous DeckLink stream time
-```
-
-Important:
-
- Use DeckLink `GetBufferedVideoFrameCount()` where available.
- Keep synthetic scheduled/completed indexes as diagnostics only.
- Do not infer device buffer depth from `mScheduledFrameIndex - mCompletedFrameIndex`.
- Do not schedule black because the app completed queue is momentarily empty while the device still has frames buffered.
- Use black only before the first valid frame or in explicit emergency fallback.
-
-## Thread Ownership
-
-### Render Cadence Thread
-
-Owns:
-
- render tick timing
- acquiring a free system-memory slot
- requesting render-thread output render/readback
- publishing completed frames
-
-Does not own:
-
- DeckLink schedule time
- completion callback processing
- fallback black scheduling
-
-### RenderEngine Render Thread
-
-Owns:
-
- GL context
- input upload
- shader rendering
- output packing/readback
- preview present when allowed
-
-Output render work should have priority over preview/screenshot work.
-
-### DeckLink Scheduler Thread
-
-Owns:
-
- schedule top-up policy
- DeckLink `ScheduleVideoFrame`
- device buffered-frame telemetry
- consuming completed frames
-
-Does not own:
-
- rendering a missing frame
- running live-state composition directly
-
-### Completion Callback / Worker
-
-Owns:
-
- releasing scheduled system slots
- recording completion result
- waking scheduler and render cadence loops
-
-Does not own:
-
- rendering
- scheduling fallback black during normal steady state
-
-## What Happens Under Stress
-
-### Render Is Temporarily Late
-
- Completed-frame queue drains.
- DeckLink scheduled buffer drains.
- Telemetry shows render lateness and completed queue depth drop.
- If render catches up before device buffer reaches zero, output remains smooth.
-
-### Render Cannot Sustain Cadence
-
- Completed-frame queue stays low.
- DeckLink buffer trends down.
- Late/drop telemetry increases.
- Policy may choose to skip render ticks, lower preview load, or enter degraded state.
-
-### DeckLink Timing Jitters
-
- Scheduler tops up based on actual device buffered count.
- Render cadence continues independently.
- System-memory buffer absorbs short mismatch.
-
-### UI Loses Focus
-
- Render cadence should continue.
- Preview present may be disabled or deprioritized.
- Output/render threads may need elevated priority.
- Device buffer telemetry should reveal whether Windows focus changes affect render cadence or only preview.
-
-## Migration Plan
-
-### Step 1: Add Real DeckLink Buffer Telemetry
-
-Before more scheduling changes, measure the real device buffer.
-
-Deliverables:
-
- [x] call DeckLink `GetBufferedVideoFrameCount()` after schedule/completion where available
- [x] expose `actualDeckLinkBufferedFrames`
- [x] keep `scheduledLeadFrames` but label it synthetic/internal
- [x] record schedule-call duration and failures
-
-Exit criteria:
-
- [x] runtime telemetry distinguishes app completed queue, system scheduled slots, synthetic lead, and actual DeckLink buffer depth
-
-### Step 2: Rename Existing Queues To Match Their Roles
-
-Clarify vocabulary before rewriting behavior.
-
-Deliverables:
-
- rename or document `RenderOutputQueue` as completed/unscheduled frame queue
- distinguish completed-frame depth from device scheduled depth
- update telemetry labels where possible
-
-Exit criteria:
-
- logs no longer imply `readyQueue.depth == 0` means DeckLink starvation
-
-### Step 3: Introduce `RenderCadenceController`
-
-Add a pure timing helper first.
-
-Responsibilities:
-
- [x] compute next render tick
- [x] track frame duration
- [x] report early/late/drift
- [x] decide whether to render, wait, or skip render ticks
-
-Tests:
-
- [x] exact cadence advances
- [x] late ticks are measured
- [x] large lateness can skip according to policy
- [x] no dependency on GL or DeckLink
-
-### Step 4: Move Output Production To Cadence Ticks
-
-Replace queue-pressure-only production with cadence-driven production.
-
-Initial behavior:
-
- [x] render at selected output cadence
- [x] produce into system-memory slots
- [x] publish completed frames
- [x] recycle/drop oldest unscheduled completed frames when cadence needs a slot
- [ ] only wait when every safe slot is scheduled/in flight
-
-Exit criteria:
-
- output rendering continues without DeckLink completions
- output rendering does not schedule DeckLink directly
- completed-frame buffering behaves as latest-N, not consume-before-render
-
-### Step 4a: Add Warmup Before DeckLink Playback
-
-DeckLink output should not start consuming before the render cadence has prepared an initial cushion.
-
-Initial behavior:
-
- configure DeckLink output without starting scheduled playback
- start the render cadence producer
- render warmup frames at the selected cadence, not faster
- wait until completed-frame depth reaches `targetWarmupFrames`
- schedule those completed frames as DeckLink preroll
- call `StartScheduledPlayback()`
-
-Exit criteria:
-
- startup does not require the render producer to catch up by rendering faster than cadence
- DeckLink begins playback with a real completed-frame buffer
- if warmup cannot fill within a bounded timeout, startup enters degraded state with telemetry
-
-### Step 5: Make DeckLink Scheduler A Separate Top-Up Loop
-
-Create a scheduler loop that consumes completed frames.
-
-Initial behavior:
-
- wake on completion, completed-frame publish, and periodic safety timer
- top up actual DeckLink buffer to target
- schedule only completed system-memory frames
- do not render or black-fill during normal steady state
-
-Exit criteria:
-
- producer and DeckLink scheduler are separate loops
- one component owns schedule time
-
-### Step 6: Remove Synthetic Catch-Up From Steady State
-
-Disable catch-up frame skipping for proactive mode.
-
-Replacement:
-
- render cadence may skip render ticks if the renderer is late
- completed queue may drop oldest or newest according to explicit policy
- DeckLink schedule time remains continuous
-
-Exit criteria:
-
- scheduled stream time advances one frame per scheduled frame unless emergency recovery is explicitly enabled
-
-### Step 7: Prioritize Output Render Work
-
-Reduce render-thread interference.
-
-Deliverables:
-
- output render commands outrank preview present
- preview skipped/deferred count is visible
- input upload timing is measured separately
- screenshot/readback cannot block output cadence unless explicitly requested
-
-Exit criteria:
-
- focus changes and preview present do not drain playout buffer
-
-### Step 8: Tune Thread Priority And Wait Strategy
-
-Only after ownership is separated, tune scheduling.
-
-Deliverables:
-
- set render cadence and DeckLink scheduler threads to appropriate Windows priorities
- avoid busy spinning
- use waitable timers or high-resolution waits where useful
- record wake jitter
-
-Exit criteria:
-
- cadence jitter is measurable and bounded
-
-## Telemetry
-
-Add or clarify:
-
- `renderCadence.targetFps`
- `renderCadence.frameIndex`
- `renderCadence.lateMs`
- `renderCadence.maxLateMs`
- `renderCadence.skippedTicks`
- `completedFrames.depth`
- `completedFrames.capacity`
- `completedFrames.underruns`
- `systemMemory.free`
- `systemMemory.rendering`
- `systemMemory.completed`
- `systemMemory.scheduled`
- `decklink.actualBufferedFrames`
- `decklink.targetBufferedFrames`
- `decklink.scheduleCallMs`
- `decklink.scheduleFailures`
- `decklink.completionIntervalMs`
- `decklink.lateFrames`
- `decklink.droppedFrames`
- `scheduler.syntheticLeadFrames`
-
-## Risks
-
- A cadence thread can render frames that DeckLink later drops if scheduling is wrong.
- Too much buffering adds latency.
- Too little buffering exposes Windows scheduling jitter.
- If output render and input upload still share one GL thread, render cadence can still be disturbed by uploads.
- Actual DeckLink buffer telemetry may differ from app-owned scheduled-slot counts.
-
-## Exit Criteria
-
-Phase 7.7 is complete when:
-
- output rendering is driven by a render cadence controller
- DeckLink completion callbacks do not render
- DeckLink scheduling is owned by a scheduler/top-up loop
- system-memory completed frames are the only contract between render and DeckLink scheduling
- real DeckLink buffered-frame count is visible
- synthetic schedule lead no longer drives normal recovery
- black fallback is startup/emergency only
- playback can be tested with 4-frame and larger buffers without changing ownership logic
--- a/docs/PHASE_7_BACKEND_LIFECYCLE_PLAYOUT_DESIGN.md
+++ b/docs/PHASE_7_BACKEND_LIFECYCLE_PLAYOUT_DESIGN.md
@@ -1,398 +0,0 @@
-# Phase 7 Design: Backend Lifecycle And Playout
-
-This document expands Phase 7 of [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) into a concrete design target.
-
-Phase 4 made the render thread the sole owner of normal runtime GL work. Phase 7 Step 4 moved DeckLink completion processing onto a backend worker, so the callback no longer directly waits for render-thread output production. Phase 7 Step 5 added a bounded ready-frame queue inside that worker, so scheduling now consumes completed output frames and falls back explicitly on underrun. Phase 7 should make backend lifecycle, buffer policy, playout headroom, and recovery explicit.
-
-Phase 5 clarified that live parameter layering stops at final render-state composition. Phase 7 should keep backend lifecycle, output queue ownership, buffer reuse, temporal/feedback resources, and stale-frame/underrun policy outside the persisted/committed/transient parameter model.
-
-## Status
-
- Phase 7 design package: proposed.
- Phase 7 implementation: complete.
- Current alignment: `VideoBackend`, `VideoIODevice`, `DeckLinkSession`, `VideoBackendLifecycle`, and `VideoPlayoutScheduler` exist. Phase 4 removed callback-thread GL ownership, Step 4 moved completion processing onto a backend worker, Step 5 uses `RenderOutputQueue` as the ready-frame handoff inside that worker, Step 6 replaces fixed late/drop skip-ahead with measured recovery decisions, and Step 7 reports backend playout health through `HealthTelemetry`.
-
-Current backend footholds:
-
- `VideoBackend` wraps device discovery/configuration, start/stop, input callback handling, output completion handling, and telemetry publication.
- `DeckLinkSession` owns DeckLink device handles, frame pool creation, preroll, keyer configuration, and scheduled playback.
- `VideoPlayoutPolicy` names current frame pool, preroll, ready-frame, underrun, and catch-up policy defaults.
- `RenderOutputQueue` names the future bounded ready-output-frame handoff and has pure queue tests.
- `VideoPlayoutScheduler` owns schedule time generation, completion indexing, late/drop streaks, ready-queue pressure input, and measured recovery decisions.
- `OpenGLVideoIOBridge` is the current adapter between `VideoBackend` and `RenderEngine`.
- `HealthTelemetry` receives signal, render, pacing, lifecycle, queue, underrun, late/drop, and scheduler recovery observations.
-
-## Why Phase 7 Exists
-
-The current output path works only while render/readback stays comfortably inside budget. A late render can make the callback late, which reduces device-side headroom, which makes the next callback more fragile.
-
-The resilience review calls this the main remaining live-resilience risk after Phase 4:
-
- output playout is still effectively filled on demand by a backend completion worker, but scheduling now consumes a bounded ready-frame queue
- buffer pool size and preroll depth are not sourced from one policy
- late/dropped recovery is a fixed skip rule
- backend lifecycle is imperative rather than represented as explicit states
-
-Phase 7 should separate hardware timing from render production.
-
-## Goals
-
-Phase 7 should establish:
-
- explicit backend lifecycle states and allowed transitions
- one playout policy for frame pool size, preroll, headroom, and underrun behavior
- a bounded producer/consumer output queue between render and DeckLink scheduling
- lightweight DeckLink callbacks that dequeue/schedule/account rather than render
- measured recovery from late/dropped frames
- structured backend health reporting
- tests for scheduler, queue, lifecycle, and underrun policy without DeckLink hardware
-
-## Non-Goals
-
-Phase 7 should not require:
-
- a new renderer
- changing shader/state composition
- changing committed-live or transient automation layering
- replacing DeckLink support with multiple backends
- full telemetry UI redesign
- removing every synchronous API immediately
- perfect adaptive latency policy in the first pass
-
-## Target Timing Model
-
-The target model is producer/consumer playout:
-
-```text
-RenderEngine/render scheduler produces completed output frames
-  -> bounded ready-frame queue
-  -> VideoBackend consumes ready frames
-  -> DeckLink callback schedules already-prepared frames
-```
-
-The callback should not wait for rendering. It should:
-
- record completion result
- recycle/release completed buffers
- dequeue a ready frame or apply underrun policy
- schedule the next frame
- publish backend timing/health observations
-
-The queue contains rendered output-frame ownership and scheduling metadata, not live parameter state. Parameter composition should already be resolved before an output frame enters this playout boundary.
-
-## Target Lifecycle Model
-
-Suggested backend states:
-
-1. `Uninitialized`
-2. `Discovering`
-3. `Discovered`
-4. `Configuring`
-5. `Configured`
-6. `Prerolling`
-7. `Running`
-8. `Degraded`
-9. `Stopping`
-10. `Stopped`
-11. `Failed`
-
-Suggested transition rules:
-
- `Uninitialized -> Discovering`
- `Discovering -> Discovered | Failed`
- `Discovered -> Configuring | Stopped`
- `Configuring -> Configured | Failed`
- `Configured -> Prerolling | Stopped`
- `Prerolling -> Running | Failed | Stopping`
- `Running -> Degraded | Stopping | Failed`
- `Degraded -> Running | Stopping | Failed`
- `Stopping -> Stopped`
-
-The exact enum can change, but the lifecycle should become observable and testable.
-
-## Proposed Collaborators
-
-### `VideoBackendStateMachine`
-
-Pure or mostly pure lifecycle transition helper.
-
-Responsibilities:
-
- validate state transitions
- produce transition observations
- track failure reasons
- keep start/stop/recovery behavior auditable
-
-Non-responsibilities:
-
- DeckLink API calls
- rendering
- persistence
-
-### `PlayoutPolicy`
-
-Policy object for queue and timing behavior.
-
-Expected fields:
-
- target preroll frames
- maximum ready frames
- minimum spare device buffers
- underrun behavior
- maximum catch-up frames
- adaptive headroom enabled/disabled
-
-### `RenderOutputQueue`
-
-Bounded queue or ring for completed output frames.
-
-Responsibilities:
-
- accept completed render outputs
- expose ready frames for scheduling
- track depth, drops, stale reuse, and underruns
- keep ownership/lifetime clear between render and backend
-
-### `OutputFramePool`
-
-Backend-owned device buffer pool.
-
-Responsibilities:
-
- own DeckLink mutable frames
- expose available buffers for render/readback or scheduling
- recycle completed frames
- report spare-buffer depth
-
-### `PlayoutController`
-
-Coordinates policy, ready frames, device schedule times, and completion accounting.
-
-Responsibilities:
-
- preroll frames
- schedule next frame
- handle late/drop/completed/flushed results
- apply underrun policy
- publish timing state
-
-## Output Queue Policy
-
-The initial output queue should be small and bounded.
-
-Candidate defaults:
-
- target ready frames: 2-3
- max ready frames: 3-5
- underrun: reuse last completed frame if available, otherwise black
- late/drop: increase degraded counters and optionally increase headroom within limits
-
-The exact numbers should be measured, but the policy should live in one place instead of being split across constants.
-
-## Underrun Policy
-
-When no fresh rendered frame is available, options are:
-
-1. reuse newest completed frame
-2. reuse last scheduled frame
-3. schedule black/degraded frame
-4. skip/catch up schedule time
-
-Phase 7 should pick one default and make it visible in telemetry. Reusing the newest completed frame is often the best first policy for live visual continuity, but key/fill behavior may require careful testing.
-
-## Migration Plan
-
-### Step 1. Name Lifecycle States
-
-Introduce backend state enum and transition reporting without changing scheduling behavior much.
-
-Initial target:
-
- [x] state changes are explicit
- [x] invalid transitions are detectable
- [x] tests cover allowed transitions
-
-Current implementation:
-
- `VideoBackendLifecycle` names backend states and validates allowed transitions.
- `VideoBackend` applies lifecycle transitions around discovery, configuration, start, stop, degradation, failure, and resource release.
- Existing `BackendStateChangedEvent` publication now uses lifecycle state names for backend lifecycle observations.
- `VideoBackendLifecycleTests` cover allowed transitions, rejected invalid transitions, failure reasons, retry, and stable state names.
-
-### Step 2. Create Playout Policy Object
-
-Unify fixed constants and scheduler assumptions.
-
-Initial target:
-
- [x] frame pool size derives from policy
- [x] preroll count derives from policy
- [x] late/drop recovery reads policy
-
-Current implementation:
-
- `VideoPlayoutPolicy` defines current output frame pool, preroll, ready-frame, spare-buffer, underrun, catch-up, and adaptive-headroom settings.
- `DeckLinkSession` uses the policy for output frame pool creation and preroll count.
- `VideoPlayoutScheduler` stores the policy and uses `lateOrDropCatchUpFrames` instead of a hard-coded `+2` recovery step.
- `VideoPlayoutSchedulerTests` cover default compatibility behavior, policy-driven catch-up, and policy normalization.
-
-### Step 3. Add Ready Output Queue
-
-Introduce a bounded queue for completed output frames.
-
-Initial target:
-
- [x] pure queue tests
- [x] explicit depth/underrun metrics
- [x] no DeckLink dependency in queue tests
-
-Current implementation:
-
- `RenderOutputQueue` owns a bounded FIFO of `RenderOutputFrame` values.
- The queue is configured from `VideoPlayoutPolicy::maxReadyFrames`.
- Queue metrics report depth, capacity, pushed, popped, dropped, and underrun counts.
- Overflow drops the oldest ready frame, preserving the newest completed output for scheduling.
- `RenderOutputQueueTests` cover ordering, bounded overflow, underrun counting, and capacity shrink behavior without DeckLink hardware.
-
-### Step 4. Move Callback Toward Dequeue/Schedule
-
-Stop producing frames directly in the completion callback path.
-
-Transitional target:
-
- [x] callback wakes/schedules a backend worker
- [x] worker consumes ready frames
-
-Final target:
-
- callback only records, recycles, dequeues, schedules
-
-Current implementation:
-
- `VideoBackend::HandleOutputFrameCompletion(...)` now enqueues completion work and wakes an output-completion worker.
- The output-completion worker drains pending completions and runs the existing render/schedule path.
- This preserves behavior while removing the direct callback-thread wait on render-thread output production.
- Step 5 now makes this worker consume ready frames from `RenderOutputQueue`; Step 4 remains the boundary that keeps output completion callbacks from doing render production directly.
-
-### Step 5. Make Render Produce Ahead
-
-Teach render/output code to keep the ready queue filled to target headroom.
-
-Initial target:
-
- [x] render thread produces on demand until queue has target depth
- [x] callback does not synchronously wait for fresh render
- [x] stale/black fallback is explicit on underrun
-
-Current implementation:
-
- The backend output-completion worker fills `RenderOutputQueue` to `VideoPlayoutPolicy::targetReadyFrames`.
- Scheduling now pops a ready frame from `RenderOutputQueue` instead of directly scheduling the freshly rendered frame.
- If no ready frame can be produced, the worker schedules an explicit black fallback frame and reports degraded lifecycle state.
- This is still demand-filled by the backend worker; a future pass can make render production more proactive or timer/pressure driven.
-
-### Step 6. Replace Fixed Late/Drop Recovery
-
-Replace fixed `+2` schedule-index recovery with measured lag/headroom accounting.
-
-Initial target:
-
- [x] track scheduled index, completed index, queue depth, late streak, drop streak
- [x] recovery decisions use measured lag
-
-Current implementation:
-
- `VideoPlayoutRecoveryDecision` reports completion result, completed index, scheduled index, ready queue depth, scheduled lead, measured lag, catch-up frames, late streak, and drop streak.
- `VideoPlayoutScheduler::AccountForCompletionResult(...)` now accepts ready queue depth and returns a recovery decision.
- Recovery is measured from late/drop streaks, scheduled lead, and ready queue pressure, then capped by `VideoPlayoutPolicy::lateOrDropCatchUpFrames`.
- `VideoBackend` passes the current ready queue depth into the video device completion-accounting call.
- `VideoPlayoutSchedulerTests` cover measured late recovery, measured drop recovery, policy caps, completed-index tracking, and streak clearing.
-
-### Step 7. Route Backend Health Structurally
-
-Publish backend lifecycle, queue depth, underrun, late/drop, and degraded-state observations through `HealthTelemetry`.
-
-Initial target:
-
- [x] backend lifecycle state is visible in health telemetry
- [x] ready queue depth, capacity, drops, and underruns are visible
- [x] late/drop streaks and scheduler recovery decisions are visible
- [x] runtime-state JSON exposes the backend playout health snapshot
-
-Current implementation:
-
- `HealthTelemetry::BackendPlayoutSnapshot` captures lifecycle state, completion result, ready queue metrics, scheduler indices, scheduled lead, measured lag, catch-up frames, late/drop streaks, aggregate late/drop/flushed counts, degraded state, and status message.
- `VideoBackend::RecordBackendPlayoutHealth(...)` samples `RenderOutputQueue` metrics after each processed output completion and reports the latest scheduler recovery decision.
- `RuntimeStatePresenter` publishes the snapshot as `backendPlayout`, including `readyQueue` and `recovery` sections.
- `HealthTelemetryTests` cover backend playout health recording, try-record behavior, and inclusion in the full health snapshot.
-
-## Testing Strategy
-
-Recommended tests:
-
- allowed lifecycle transitions pass
- invalid lifecycle transitions fail
- playout policy derives frame pool/preroll sizes consistently
- output queue preserves ordering
- bounded output queue rejects/drops according to policy
- underrun reuses last frame or black according to policy
- late/drop accounting updates degraded state
- scheduler catch-up uses measured lag, not fixed skip
- stop drains/recycles device-frame ownership in pure fakes
-
-Useful homes:
-
- `VideoPlayoutSchedulerTests` for scheduler evolution
- `VideoIODeviceFakeTests` for fake backend lifecycle
- a new `VideoBackendStateMachineTests`
- a new `RenderOutputQueueTests`
-
-## Risks
-
-### Latency Risk
-
-More headroom means more latency. Phase 7 should make latency a visible policy choice.
-
-### Buffer Lifetime Risk
-
-Render and backend will share ownership boundaries around output buffers. Frame ownership must be explicit to avoid reuse while hardware still owns a frame.
-
-### Underrun Policy Risk
-
-Reusing stale frames can be visually acceptable, but wrong key/fill behavior may be worse than black. Test with real output.
-
-### Callback Thread Risk
-
-Even after decoupling render, callback work must stay small and bounded.
-
-### Scope Risk
-
-Backend lifecycle and playout queue are related, but either can grow large. Implement in small, testable slices.
-
-## Phase 7 Exit Criteria
-
-Phase 7 can be considered complete once the project can say:
-
- [x] backend lifecycle states and transitions are explicit
- [x] playout policy owns preroll, pool size, headroom, and underrun behavior
- [x] output callbacks no longer synchronously wait for render production
- [x] render produces completed output frames into a bounded queue
- [x] underrun behavior is explicit and observable
- [x] late/drop recovery is measured rather than fixed skip-only
- [x] backend health reports lifecycle, queue, underrun, late, and dropped state
- [x] queue/lifecycle/scheduler behavior has non-DeckLink tests
-
-## Open Questions
-
- What should the default ready-frame depth be at 30fps and 60fps?
- Should underrun reuse last completed, last scheduled, or black?
- Should output queue depth be user-configurable?
- Should render cadence be driven by backend demand, a timer, or queue-fill pressure?
- How should external keying influence stale-frame/black fallback?
- Should input and output lifecycle states be separate endpoints under one backend shell?
-
-## Short Version
-
-Phase 7 should stop making DeckLink callbacks wait for render.
-
-Render produces ahead into a bounded queue. The backend consumes ready frames according to explicit lifecycle and playout policy. Queue depth, underruns, late frames, dropped frames, and degraded states become measured and visible.
--- a/docs/PHASE_8_HEALTH_TELEMETRY_DESIGN.md
+++ b/docs/PHASE_8_HEALTH_TELEMETRY_DESIGN.md
@@ -1,367 +0,0 @@
-# Phase 8 Design: Health, Telemetry, And Operational Reporting
-
-This document expands Phase 8 of [ARCHITECTURE_RESILIENCE_REVIEW.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md) into a concrete design target.
-
-Earlier phases clarify subsystem ownership, state layering, render-thread ownership, persistence, and backend lifecycle. Phase 8 should make operational visibility match that architecture: structured health state, timing, counters, warnings, and logs should flow through one telemetry subsystem instead of scattered debug strings and ad hoc status fields.
-
-## Status
-
- Phase 8 design package: proposed.
- Phase 8 implementation: not started.
- Current alignment: `HealthTelemetry` exists and already receives some render, signal, video IO, and pacing observations. Runtime events also carry some timing and backend observations. The remaining work is to make health/telemetry structured, comprehensive, bounded, and operator-facing.
-
-Current telemetry footholds:
-
- `HealthTelemetry` owns basic signal, performance, frame pacing, and video IO status reporting.
- `RuntimeEventDispatcher` publishes typed observations such as timing samples and backend state changes.
- `RuntimeStatePresenter` includes some health/performance fields in runtime-state output.
- Render and backend paths already collect some timing and late/drop counts.
-
-## Why Phase 8 Exists
-
-The app can detect many problems, but operational visibility is still fragmented:
-
- some failures show modal dialogs
- some warnings go only to `OutputDebugStringA`
- some timing lives in health telemetry
- some observations are runtime events
- UI-facing state combines operational state with runtime state
- repeated warnings are not uniformly deduplicated, classified, or summarized
-
-Live software needs to answer:
-
- what is healthy right now?
- what is degraded but still running?
- what recently failed?
- which subsystem is under timing pressure?
- what should an operator see versus what should an engineer debug?
-
-## Goals
-
-Phase 8 should establish:
-
- structured log entries with subsystem, severity, category, timestamp, and message
- subsystem-scoped health states
- bounded recent warning/error history
- timing samples, counters, and gauges for render/control/backend/persistence
- stable health snapshots for UI/diagnostics
- direct debug-output paths wrapped by structured telemetry
- low-overhead reporting from render and callback paths
- tests for severity, deduplication, counters, snapshots, and bounded retention
-
-## Non-Goals
-
-Phase 8 should not require:
-
- a cloud telemetry service
- external metrics database
- a full UI redesign
- automatic recovery policy owned by telemetry
- unbounded logs or time-series storage
- replacing every `MessageBoxA` on day one
-
-Telemetry observes and reports. It does not become the control plane.
-
-## Target Model
-
-Suggested core model:
-
- `TelemetrySubsystem`
- `TelemetrySeverity`
- `TelemetryLogEntry`
- `TelemetryWarningRecord`
- `TelemetryCounter`
- `TelemetryGauge`
- `TelemetryTimingSample`
- `SubsystemHealthState`
- `HealthSnapshot`
-
-Important distinction:
-
- raw observations are append/update operations
- health snapshots are derived read models
-
-## Health Domains
-
-At minimum:
-
- `ApplicationShell`
- `RuntimeStore`
- `RuntimeCoordinator`
- `RuntimeSnapshotProvider`
- `ControlServices`
- `RenderEngine`
- `VideoBackend`
- `Persistence`
-
-Suggested states:
-
- `Healthy`
- `Warning`
- `Degraded`
- `Error`
- `Unavailable`
-
-The overall app health should be derived from subsystem states.
-
-## Proposed Interfaces
-
-### Write Interface
-
-Target operations:
-
- `AppendLog(...)`
- `RaiseWarning(...)`
- `ClearWarning(...)`
- `RecordCounterDelta(...)`
- `RecordGauge(...)`
- `RecordTimingSample(...)`
- `ReportSubsystemState(...)`
-
-Hot-path producers should be able to record observations cheaply and return.
-
-### Read Interface
-
-Target operations:
-
- `BuildHealthSnapshot()`
- `GetSubsystemHealth(...)`
- `GetActiveWarnings()`
- `GetRecentLogs(...)`
- `GetTimingSummary(...)`
-
-UI/control services should consume snapshots, not scrape subsystem internals.
-
-## Producer Expectations
-
-### `RenderEngine`
-
-Expected observations:
-
- render frame duration
- input upload duration/count/drop/coalescing
- output request latency
- readback duration
- synchronous readback fallback count
- preview present cost/skips
- wrong-thread diagnostics
-
-### `VideoBackend`
-
-Expected observations:
-
- lifecycle state
- playout queue depth
- output underruns
- late/dropped/flushed/completed counts
- input signal state
- output model/mode status
- spare buffer depth
-
-### `ControlServices`
-
-Expected observations:
-
- OSC decode errors
- control request failures
- websocket broadcast failures
- ingress queue depth
- file-watch/reload events
- service start/stop state
-
-### `RuntimeCoordinator`
-
-Expected observations:
-
- rejected mutation count and reasons
- reload requests
- preset failures
- transient-state invalidations
- persistence request publication
-
-### `RuntimeSnapshotProvider`
-
-Expected observations:
-
- snapshot publish duration
- snapshot version churn
- stale snapshot/fallback behavior
- publish failures
-
-### `PersistenceWriter`
-
-Expected observations:
-
- pending write count
- coalesced write count
- write duration
- write failure
- unsaved durable changes
- shutdown flush result
-
-## Logging Policy
-
-Direct string logging can remain as an output sink, but not as the source of truth.
-
-Target flow:
-
-```text
-subsystem reports structured warning/log
-  -> HealthTelemetry stores bounded structured entry
-  -> optional debug sink prints text
-  -> UI/diagnostics reads health snapshot
-```
-
-Repeated warnings should be deduplicated by key while preserving counts and last-seen timestamps.
-
-## Snapshot Contract
-
-`HealthSnapshot` should answer:
-
- overall health
- subsystem health states
- active warnings
- recent important logs
- key counters
- key timing summaries
- degraded-state reasons
-
-The snapshot should avoid copying durable runtime truth. Runtime state and health state can be published together by `ControlServices`, but they should remain separate read models.
-
-## Migration Plan
-
-### Step 1. Expand Health Model Types
-
-Add structured subsystem/severity/category types and snapshot models.
-
-Initial target:
-
- keep existing health fields
- add structured warning/log/counter/gauge containers
- add tests for bounded retention and deduplication
-
-### Step 2. Wrap Direct Warning Paths
-
-Route common direct logs through telemetry first.
-
-Initial candidates:
-
- backend fallback warnings
- screenshot write failures
- OSC decode/dispatch failures
- render-thread request failures
-
-### Step 3. Add Subsystem Health States
-
-Let subsystems report state transitions.
-
-Initial target:
-
- `RenderEngine`: healthy/degraded on render-thread request failures
- `VideoBackend`: configured/running/degraded/no-input/dropping
- `ControlServices`: running/degraded/stopped
- `Persistence`: clean/pending/error
-
-### Step 4. Split Timing Into Named Metrics
-
-Move from broad timing fields to named samples/gauges.
-
-Initial target:
-
- render duration
- readback duration/fallback count
- output request latency
- playout completion interval
- event queue depth
- persistence write duration
-
-### Step 5. Publish Health Snapshot
-
-Expose `HealthTelemetry` snapshot through control/runtime presentation.
-
-Initial target:
-
- UI can distinguish runtime state from operational health
- active warnings are visible
- recent degraded reasons are visible
-
-### Step 6. Add Operational Tests
-
-Cover:
-
- warning raise/clear
- repeated warning coalescing
- counter/gauge updates
- health derivation
- bounded log retention
- snapshot stability
-
-## Testing Strategy
-
-Recommended tests:
-
- warning raised appears in active warnings
- warning clear removes active warning but preserves history
- repeated warning increments count and updates last-seen time
- bounded log keeps newest entries
- subsystem `Error` makes overall health `Error`
- subsystem `Degraded` makes overall health degraded if no error exists
- timing sample updates summary
- counter delta accumulates
- health snapshot is read-only/stable
-
-Useful homes:
-
- `HealthTelemetryTests`
- `RuntimeEventTypeTests` for observation event payloads
- future integration tests for control-service health publication
-
-## Risks
-
-### Telemetry Becomes Behavior
-
-Telemetry must not become the hidden way subsystems command each other. It reports. Subsystems own mitigation.
-
-### Too Much Hot-Path Cost
-
-Render and callback paths need cheap writes. Use bounded structures and avoid expensive formatting on hot paths.
-
-### String-Only Logging
-
-Centralizing strings is not enough. Severity, subsystem, category, and structured fields should be first-class.
-
-### Snapshot Bloat
-
-Health snapshots should summarize operational state, not duplicate full runtime/project state.
-
-### Alert Noise
-
-Without deduplication and severity discipline, operator-facing health can become noisy and ignored.
-
-## Phase 8 Exit Criteria
-
-Phase 8 can be considered complete once the project can say:
-
- [ ] major subsystems publish structured health/telemetry observations
- [ ] active warnings and recent logs are structured and bounded
- [ ] subsystem health states roll up to an overall health state
- [ ] render/backend/control/persistence timing metrics are named and visible
- [ ] direct debug-string warning paths are wrapped or retired for major cases
- [ ] UI/control diagnostics can consume a stable health snapshot
- [ ] telemetry write paths are cheap enough for render/callback use
- [ ] telemetry behavior has focused tests
-
-## Open Questions
-
- Should debug output remain enabled by default as a telemetry sink?
- How many recent logs/warnings should be retained in memory?
- Should timing summaries store raw samples, rolling windows, or both?
- Should warning thresholds be declared centrally or owned by each subsystem?
- Should health snapshots be published with runtime state or on a separate endpoint/channel?
- Should logs eventually be written to disk, and if so, through Phase 6 persistence infrastructure or a separate log sink?
-
-## Short Version
-
-Phase 8 should make the app diagnosable.
-
-Subsystems report structured observations. `HealthTelemetry` records bounded logs, warnings, counters, gauges, timing, and subsystem states. UI and diagnostics consume stable health snapshots. Debug strings become a sink, not the source of truth.
--- a/docs/subsystems/README.md
+++ b/docs/subsystems/README.md
@@ -1,70 +1,44 @@
-# Phase 1 Subsystem Design Index
+# Subsystem Notes Index

-This directory contains the subsystem-specific design notes for Phase 1 of the architecture roadmap.
+The current, phase-free architecture summary is:

-Start here if you want the Phase 1 package to read as one coherent deliverable rather than as separate subsystem writeups.
+- [Current System Architecture](../CURRENT_SYSTEM_ARCHITECTURE.md)

-Parent documents:
+Start there when you want to understand how the application works now.

- [Architecture Resilience Review](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/ARCHITECTURE_RESILIENCE_REVIEW.md)
- [Phase 1: Subsystem Boundaries and Target Architecture](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/PHASE_1_SUBSYSTEM_BOUNDARIES_DESIGN.md)
-
-## How This Set Fits Together
-
- [PHASE_1_SUBSYSTEM_BOUNDARIES_DESIGN.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/PHASE_1_SUBSYSTEM_BOUNDARIES_DESIGN.md) defines the top-level subsystem split, dependency rules, state categories, and migration guardrails.
- The notes in this directory expand each subsystem boundary without changing the parent Phase 1 design.
- The subsystem notes are meant to be read as design companions, not as independent alternate architectures.
-
-Status note:
-
- The Phase 1 design package is complete.
- The runtime implementation foothold is complete: the named runtime subsystems exist in code, `RuntimeHost` is retired from the compiled runtime path, and subsystem tests cover the new seams.
- The whole app is not fully extracted yet, so these notes still describe the architecture later phases should continue toward.
+This directory contains deeper notes for individual subsystem boundaries. These notes were originally written during the phased architecture work, so some files may still mention migration steps or target-state language. Treat them as companion notes, not as the source of truth when they disagree with the current architecture summary.

 ## Recommended Reading Order

-1. [PHASE_1_SUBSYSTEM_BOUNDARIES_DESIGN.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/PHASE_1_SUBSYSTEM_BOUNDARIES_DESIGN.md)
-2. [RuntimeStore.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeStore.md)
-3. [RuntimeCoordinator.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeCoordinator.md)
-4. [RuntimeSnapshotProvider.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeSnapshotProvider.md)
-5. [ControlServices.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/ControlServices.md)
-6. [RenderEngine.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RenderEngine.md)
-7. [VideoBackend.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/VideoBackend.md)
-8. [HealthTelemetry.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/HealthTelemetry.md)
+1. [Current System Architecture](../CURRENT_SYSTEM_ARCHITECTURE.md)
+2. [RuntimeStore](RuntimeStore.md)
+3. [RuntimeCoordinator](RuntimeCoordinator.md)
+4. [RuntimeSnapshotProvider](RuntimeSnapshotProvider.md)
+5. [ControlServices](ControlServices.md)
+6. [RenderEngine](RenderEngine.md)
+7. [VideoBackend](VideoBackend.md)
+8. [HealthTelemetry](HealthTelemetry.md)

-That order mirrors the intended dependency story:
+That order follows the current ownership story:

 - durable state first
 - mutation and publication next
- ingress and render boundaries after that
- device timing and operational visibility last
+- control ingress after that
+- render ownership and video timing next
+- operational visibility last

 ## Subsystem Notes

- [RuntimeStore.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeStore.md)
-  Durable runtime-state facade over layer-stack, config, package-catalog, presentation, and persistence boundaries.
- [RuntimeCoordinator.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeCoordinator.md)
-  Mutation validation, state classification, reset/reload policy, and publication/persistence requests.
- [RuntimeSnapshotProvider.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RuntimeSnapshotProvider.md)
-  Render-facing snapshot publication boundary backed by explicit render snapshot building/versioning.
- [ControlServices.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/ControlServices.md)
-  OSC, HTTP/WebSocket, and file-watch ingress plus normalization and service-local buffering.
- [RenderEngine.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/RenderEngine.md)
-  Sole-owner render/GL boundary, render-local transient state, preview, and playout-ready frame production.
- [VideoBackend.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/VideoBackend.md)
-  Device lifecycle, input/output pacing, buffer policy, and producer/consumer playout direction.
- [HealthTelemetry.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/subsystems/HealthTelemetry.md)
-  Logs, warnings, counters, timing traces, and subsystem health snapshots.
+- [RuntimeStore](RuntimeStore.md): durable runtime-state facade over layer-stack, config, package-catalog, presentation, and persistence boundaries.
+- [RuntimeCoordinator](RuntimeCoordinator.md): mutation validation, state classification, reset/reload policy, and publication/persistence requests.
+- [RuntimeSnapshotProvider](RuntimeSnapshotProvider.md): render-facing snapshot publication boundary backed by explicit render snapshot building/versioning.
+- [ControlServices](ControlServices.md): OSC, HTTP/WebSocket, and file-watch ingress plus normalization and service-local buffering.
+- [RenderEngine](RenderEngine.md): GL ownership boundary, render-local transient state, preview, and playout-ready frame production.
+- [VideoBackend](VideoBackend.md): device lifecycle, input/output pacing, buffer policy, and producer/consumer playout behavior.
+- [HealthTelemetry](HealthTelemetry.md): logs, warnings, counters, timing traces, and subsystem health snapshots.

-## What Phase 1 Should Settle
+## Historical Documents

-Phase 1 should leave the project with:
+The `docs/PHASE_*` files and experiment logs record how the architecture evolved. They are useful when you need rationale, investigation history, or rejected paths, but they are no longer arranged as the main feature split for the app.

- one agreed subsystem vocabulary
- one agreed dependency direction map
- one agreed state-category model
- one agreed current-to-target migration story
-
-Phase 1 does not need to settle every later implementation detail. The subsystem notes intentionally leave some questions open where later phases need room to choose concrete mechanics.
-
-As of the current codebase, those design questions are settled well enough for later work to build against them. Remaining implementation work should be tracked under later phases, especially eventing, render-thread ownership, persistence, backend lifecycle, live-state layering, and telemetry.
+For current implementation work, use [Current System Architecture](../CURRENT_SYSTEM_ARCHITECTURE.md) as the entry point and only dip into the phase documents when you need context for why a subsystem ended up this way.
--- a/tests/VideoIODeviceFakeTests.cpp
+++ b/tests/VideoIODeviceFakeTests.cpp
@@ -54,8 +54,15 @@ public:
 		return true;
 	}

-	bool Start() override
+	bool PrepareOutputSchedule() override
 	{
+		mPreparedOutputSchedule = true;
+		return true;
+	}
+
+	bool StartInputStreams() override
+	{
+		mInputStreamsStarted = true;
 		mState.hasInputSource = true;
 		VideoIOFrame input;
 		input.bytes = mInputBytes.data();
@@ -65,11 +72,22 @@ public:
 		input.pixelFormat = mState.inputPixelFormat;
 		if (mInputCallback)
 			mInputCallback(input);
+		return true;
+	}
+
+	bool StartScheduledPlayback() override
+	{
+		mScheduledPlaybackStarted = true;
 		if (mOutputCallback)
 			mOutputCallback(VideoIOCompletion{ VideoIOCompletionResult::Completed });
 		return true;
 	}

+	bool Start() override
+	{
+		return PrepareOutputSchedule() && StartInputStreams() && StartScheduledPlayback();
+	}
+
 	bool Stop() override { return true; }
 	const VideoIOState& State() const override { return mState; }
 	VideoIOState& MutableState() override { return mState; }
@@ -103,6 +121,9 @@ public:
 	}

 	unsigned ScheduledFrames() const { return mScheduledFrames; }
+	bool PreparedOutputSchedule() const { return mPreparedOutputSchedule; }
+	bool InputStreamsStarted() const { return mInputStreamsStarted; }
+	bool ScheduledPlaybackStarted() const { return mScheduledPlaybackStarted; }
 	VideoIOCompletionResult LastCompletion() const { return mLastCompletion; }
 	uint64_t LastReadyQueueDepth() const { return mLastReadyQueueDepth; }

@@ -113,6 +134,9 @@ private:
 	std::array<unsigned char, 3840> mInputBytes = {};
 	std::array<unsigned char, 7680> mOutputBytes = {};
 	unsigned mScheduledFrames = 0;
+	bool mPreparedOutputSchedule = false;
+	bool mInputStreamsStarted = false;
+	bool mScheduledPlaybackStarted = false;
 	VideoIOCompletionResult mLastCompletion = VideoIOCompletionResult::Unknown;
 	uint64_t mLastReadyQueueDepth = 0;
 };
@@ -144,6 +168,9 @@ int main()

 	Expect(inputSeen, "fake input callback emits generic frame");
 	Expect(outputSeen, "fake output callback emits generic completion");
+	Expect(device.PreparedOutputSchedule(), "fake output schedule was prepared");
+	Expect(device.InputStreamsStarted(), "fake input streams started");
+	Expect(device.ScheduledPlaybackStarted(), "fake scheduled playback started");
 	Expect(device.ScheduledFrames() == 1, "fake backend schedules one frame");
 	Expect(device.LastCompletion() == VideoIOCompletionResult::Completed, "fake backend records generic completion");
 	Expect(device.LastReadyQueueDepth() == 2, "fake backend records ready queue depth");
Author	SHA1	Message	Date
Aiden	2531d871e8	Doc cleanup All checks were successful CI / React UI Build (push) Successful in 10s Details CI / Native Windows Build And Tests (push) Successful in 2m49s Details CI / Windows Release Package (push) Successful in 3m8s Details	2026-05-12 01:37:20 +10:00
Aiden	709d3d3fa4	Test works All checks were successful CI / React UI Build (push) Successful in 11s Details CI / Native Windows Build And Tests (push) Successful in 2m53s Details CI / Windows Release Package (push) Successful in 3m1s Details	2026-05-12 01:30:30 +10:00
Aiden	ea31d0ca13	Clean	2026-05-12 01:21:42 +10:00