Phase 4 complete

2026-05-11 18:39:02 +10:00
parent f141d20026
commit 761df3b2d0
5 changed files with 68 additions and 97 deletions
--- a/apps/LoopThroughWithOpenGLCompositing/gl/RenderEngine.cpp
+++ b/apps/LoopThroughWithOpenGLCompositing/gl/RenderEngine.cpp
@@ -513,25 +513,6 @@ bool RenderEngine::QueueInputFrame(const VideoIOFrame& inputFrame, const VideoIO
 	return true;
 }

-bool RenderEngine::TryUploadInputFrame(const VideoIOFrame& inputFrame, const VideoIOState& videoState)
-{
-	if (inputFrame.hasNoInputSource || inputFrame.bytes == nullptr)
-		return true;
-
-	if (mRenderThreadRunning)
-	{
-		return TryInvokeOnRenderThread("input-upload", [this, inputFrame, videoState]() {
-			mRenderCommandQueue.RequestInputUpload({ inputFrame, videoState });
-			RenderInputUploadRequest request;
-			return mRenderCommandQueue.TryTakeInputUpload(request) &&
-				UploadInputFrameOnRenderThread(request.inputFrame, request.videoState);
-		});
-	}
-
-	ReportRenderThreadRequestFailure("input-upload", "render thread is not running");
-	return false;
-}
-
 bool RenderEngine::UploadInputFrameOnRenderThread(const VideoIOFrame& inputFrame, const VideoIOState& videoState)
 {
 	ReportWrongThreadRenderAccess("input-upload");
@@ -567,11 +548,6 @@ bool RenderEngine::RequestOutputFrame(const RenderPipelineFrameContext& context,
 	return false;
 }

-bool RenderEngine::RenderOutputFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame)
-{
-	return RequestOutputFrame(context, outputFrame);
-}
-
 bool RenderEngine::RenderOutputFrameOnRenderThread(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame)
 {
 	ReportWrongThreadRenderAccess("output-render");
@@ -656,22 +632,6 @@ bool RenderEngine::ReadOutputFrameRgbaOnRenderThread(unsigned width, unsigned he
 	return true;
 }

-bool RenderEngine::CaptureOutputFrameRgbaTopDown(unsigned width, unsigned height, std::vector<unsigned char>& topDownPixels)
-{
-	if (mRenderThreadRunning)
-	{
-		return TryInvokeOnRenderThread("screenshot-capture", [this, width, height, &topDownPixels]() {
-			mRenderCommandQueue.RequestScreenshotCapture({ width, height });
-			RenderScreenshotCaptureRequest request;
-			return mRenderCommandQueue.TryTakeScreenshotCapture(request) &&
-				CaptureOutputFrameRgbaTopDownOnRenderThread(request.width, request.height, topDownPixels);
-		});
-	}
-
-	ReportRenderThreadRequestFailure("screenshot-capture", "render thread is not running");
-	return false;
-}
-
 bool RenderEngine::CaptureOutputFrameRgbaTopDownOnRenderThread(unsigned width, unsigned height, std::vector<unsigned char>& topDownPixels)
 {
 	std::vector<unsigned char> bottomUpPixels;
--- a/apps/LoopThroughWithOpenGLCompositing/gl/RenderEngine.h
+++ b/apps/LoopThroughWithOpenGLCompositing/gl/RenderEngine.h
@@ -102,15 +102,12 @@ public:
 	bool TryPresentPreview(bool force, unsigned previewFps, unsigned outputFrameWidth, unsigned outputFrameHeight);
 	bool RequestScreenshotCapture(unsigned width, unsigned height, ScreenshotCaptureCallback completion);
 	bool QueueInputFrame(const VideoIOFrame& inputFrame, const VideoIOState& videoState);
-	bool TryUploadInputFrame(const VideoIOFrame& inputFrame, const VideoIOState& videoState);
 	bool RequestOutputFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame);
-	bool RenderOutputFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame);
 	bool ResolveRenderFrameState(
 		const RenderFrameInput& input,
 		std::vector<OscOverlayCommitRequest>* commitRequests,
 		RenderFrameState& frameState);
 	void RenderPreparedFrame(const RenderFrameState& frameState);
-	bool CaptureOutputFrameRgbaTopDown(unsigned width, unsigned height, std::vector<unsigned char>& topDownPixels);

 private:
 	static constexpr std::chrono::milliseconds kRenderThreadRequestTimeout{ 250 };
--- a/docs/ARCHITECTURE_RESILIENCE_REVIEW.md
+++ b/docs/ARCHITECTURE_RESILIENCE_REVIEW.md
@@ -8,7 +8,7 @@ Phase checklist:
 - [x] Introduce an internal event model
 - [x] Split `RuntimeHost`
 - [x] Finish live-state and service-facing coordination
- [ ] Make the render thread the sole GL owner
+- [x] Make the render thread the sole GL owner
 - [ ] Refactor live state layering into an explicit composition model
 - [ ] Move persistence onto a background snapshot writer
 - [ ] Make DeckLink/backend lifecycle explicit with a state machine
@@ -18,8 +18,9 @@ Checklist note:

 - The checked Phase 1 item means the subsystem vocabulary, dependency direction, state categories, design package, and runtime implementation foothold are in place.
 - The checked Phase 2 item means the internal event model substrate is complete enough for later phases: the typed event vocabulary, app-owned dispatcher, coalesced event pump, reload bridge events, production bridges, and pure event tests are in place. Remaining items in [PHASE_2_INTERNAL_EVENT_MODEL_DESIGN.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/PHASE_2_INTERNAL_EVENT_MODEL_DESIGN.md) are narrow follow-ups, mainly completion/failure observations and later replacement of the runtime-store poll fallback with real file-watch events.
- The checked Phase 3 item means the render-facing state path now has named live-state, composition, frame-state, resolver, and service-bridge boundaries. `OpenGLComposite::renderEffect()` is reduced to runtime work, frame input construction, and frame rendering. This prepares Phase 4 but does not yet move GL work onto a dedicated render thread.
- It does not mean the whole app is fully extracted. Sole-owner render threading, deeper live-state layering, background persistence, backend lifecycle, and richer telemetry continue through later phases.
+- The checked Phase 3 item means the render-facing state path now has named live-state, composition, frame-state, resolver, and service-bridge boundaries. `OpenGLComposite::renderEffect()` is reduced to runtime work, frame input construction, and frame rendering.
+- The checked Phase 4 item means normal runtime GL work is now owned by a dedicated `RenderEngine` render thread. Input upload, output render, preview, screenshot capture, render-local resets, and shader application enter through render-thread queue/request paths instead of caller-thread context borrowing. The remaining output timing risk is callback-coupled synchronous output production, which is intentionally tracked for the later DeckLink/backend lifecycle and playout-queue work.
+- It does not mean the whole app is fully extracted. Deeper live-state layering, background persistence, backend lifecycle/playout queue policy, and richer telemetry continue through later phases.

 ## Timing Review

@@ -28,7 +29,7 @@ The recent OSC work removed several control-path stalls, but the app still has a
 - output playout is still effectively render-on-demand from the DeckLink completion callback
 - output buffering and preroll are now larger, but the buffering model is still static and only loosely related to actual render cost
 - GPU readback is partly asynchronous, but the fallback path still returns to synchronous readback on any miss
- preview presentation is still tied to the playout render path
+- preview presentation is best-effort and render-thread queued, but still shares the same render-thread budget as playout
 - background service timing is partially event-driven; runtime-store scanning still uses a bounded compatibility poll fallback

 Those points are important because they affect not just average performance, but how the app behaves under brief spikes, device jitter, or load bursts.
@@ -58,23 +59,23 @@ Recommended direction:
 - separate status/telemetry updates from control mutation paths
 - make render consume snapshots rather than sharing a large mutable authority object

-### 2. OpenGL ownership is still centralized behind one shared lock
+### 2. OpenGL ownership has moved to the render thread

-Even after recent timing improvements, preview, input upload, and playout rendering still rely on one shared GL context protected by one `CRITICAL_SECTION`.
+Phase 4 removed normal runtime dependence on the old shared GL `CRITICAL_SECTION`. `RenderEngine` now owns a dedicated render thread and binds the GL context there for normal input upload, output rendering, preview presentation, screenshot capture, shader application, and render-local reset work.

 Relevant code:

- [OpenGLComposite.h](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLComposite.h:93)
- [OpenGLComposite.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLComposite.cpp:253)
- [OpenGLVideoIOBridge.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/pipeline/OpenGLVideoIOBridge.cpp:70)
+- [RenderEngine.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/RenderEngine.cpp:36)
+- [OpenGLVideoIOBridge.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/pipeline/OpenGLVideoIOBridge.cpp:11)
+- [OpenGLComposite.cpp](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLComposite.cpp:168)

-This is still a central choke point and limits timing isolation.
+This removes cross-thread GL context borrowing as the central correctness model. The remaining timing risk is that output frame production is still synchronous from the DeckLink completion path, so a render/readback spike can still reduce playout headroom.

 Recommended direction:

- use one dedicated render thread as the sole GL owner
- have input/output/control threads queue work instead of performing GL work directly
- remove ad hoc GL use from callback threads
+- keep the render thread as the sole GL owner
+- replace synchronous output request/response with a bounded producer/consumer playout queue
+- keep preview and screenshot subordinate to output deadline pressure

 ### 3. Control flow is spread across polling and shared-memory patterns

@@ -179,7 +180,7 @@ Relevant timing code:

 Why this matters:

- the output completion path currently requests a scheduled render through `OpenGLVideoIOBridge::RenderScheduledFrame()`, which still takes the shared GL path, renders, reads back, and schedules the next frame in one callback-driven flow.
+- the output completion path currently requests a scheduled render through `OpenGLVideoIOBridge::RenderScheduledFrame()`, which asks the render thread to render/read back synchronously and then schedules the next frame in one callback-driven flow.
 - `VideoPlayoutScheduler::AccountForCompletionResult()` currently reacts to both late and dropped frames by blindly advancing the schedule index by `2`, which is simple but not especially robust.
 - `kPrerollFrameCount` is now `12`, but `DeckLinkSession::ConfigureOutput()` still creates a fixed pool of `10` mutable output frames. That mismatch suggests the buffering model is not being sized from one coherent source of truth.

@@ -203,8 +204,8 @@ That means the completion callback is currently responsible for:

 - frame pacing accounting
 - acquiring the next output buffer
- taking the GL critical section
- rendering the composite
+- requesting render-thread output production
+- waiting for render/readback completion
 - performing output readback
 - scheduling the next frame

@@ -286,7 +287,7 @@ Add lightweight tracing for:

 - input callback latency
 - input upload skip count
- GL lock wait time
+- render-thread request latency
 - render queue depth
 - render time
 - pass build/compile latency
@@ -502,6 +503,11 @@ Dedicated design note:

 - [PHASE_4_RENDER_THREAD_OWNERSHIP_DESIGN.md](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/docs/PHASE_4_RENDER_THREAD_OWNERSHIP_DESIGN.md)

+Status:
+
+- complete for GL ownership
+- remaining playout-headroom work is tracked under Phase 7/backend lifecycle
+
 Target behavior:

 - one thread owns the GL context
@@ -610,7 +616,6 @@ This phase should happen after the main ownership changes so the telemetry can r
 Recommended coverage:

 - render queue depth
- GL lock wait time, if any shared lock remains
 - input callback latency
 - input upload skip count
 - output scheduling lag
@@ -662,7 +667,7 @@ This order tries to avoid doing foundational work twice.

 ## Short Version

-The app is in a much better place than it was before the OSC timing work, but the main remaining architectural risk is still shared ownership around the shared GL path. The most sensible path forward is:
+The app is in a much better place than it was before the OSC timing work. The shared-GL ownership risk has now been addressed by Phase 4; the main remaining live-resilience risk is output playout headroom because DeckLink callbacks still synchronously request render-thread output production. The most sensible path forward is:

 1. define boundaries
 2. establish an event model
--- a/docs/PHASE_4_RENDER_THREAD_OWNERSHIP_DESIGN.md
+++ b/docs/PHASE_4_RENDER_THREAD_OWNERSHIP_DESIGN.md
@@ -6,13 +6,13 @@ Phase 1 named the subsystems. Phase 2 added the typed event substrate. Phase 3 m

 ## Status

- Phase 4 design package: proposed.
- Phase 4 implementation: Step 7 started. The existing synchronous `RenderEngine` entrypoints delegate their GL bodies to named `...OnRenderThread(...)` helpers, preview/screenshot/render-reset/input-upload/output-render requests pass through a small `RenderCommandQueue` compatibility mailbox, and `RenderEngine` now starts a dedicated render thread for normal runtime GL work.
- Current alignment: the repo has a named frame-state contract and cleaner render-state preparation. Normal runtime GL work is routed through the render thread after startup, while startup initialization still runs before the render thread is started.
+- Phase 4 design package: implemented.
+- Phase 4 implementation: complete for GL ownership. `RenderEngine` starts a dedicated render thread, owns the GL context during normal runtime work, and exposes queue/request entrypoints for input upload, output render, preview presentation, screenshot capture, shader rebuild application, and render-local resets.
+- Current alignment: normal runtime GL work is routed through the render thread after startup. Startup initialization still runs before the render thread starts while the app explicitly owns the context, and shutdown now stops DeckLink/backend work before destroying render-thread GL resources and deleting the context.

 Current GL ownership footholds:

- `RenderEngine` owns GL resources, a dedicated render thread, the current synchronous compatibility shims, a small render command mailbox, named render-thread helper methods, and wrong-thread diagnostics for those helpers.
+- `RenderEngine` owns GL resources, a dedicated render thread, synchronous request/response for output frames, a small render command mailbox, named render-thread helper methods, and wrong-thread diagnostics for those helpers.
 - `RenderFrameInput` / `RenderFrameState` provide the frame-state contract that a render thread can consume.
 - `RenderFrameStateResolver` prepares the render-facing layer state before drawing.
 - `OpenGLVideoIOBridge` calls `RenderEngine::QueueInputFrame(...)` from the input path and `RenderEngine::RequestOutputFrame(...)` from the output path.
@@ -43,7 +43,7 @@ Phase 4 should establish:
 - preview and screenshot requests become render-thread commands or consumers
 - `RenderFrameInput` / `RenderFrameState` become the stable data contract for frame production
 - GL context entrypoints are reduced to render-thread-only code paths
- tests for queue semantics, request coalescing, and lifecycle behavior without requiring DeckLink hardware
+- tests for queue semantics and request coalescing without requiring DeckLink hardware, plus explicit lifecycle ordering in code

 ## Non-Goals

@@ -64,10 +64,10 @@ The current code paths that matter most are:

 | Entry point | Current behavior | Phase 4 direction |
 | --- | --- | --- |
-| `RenderEngine::TryUploadInputFrame(...)` | synchronous compatibility shim; after render-thread startup it queues input upload work and waits for render-thread completion | enqueue latest input frame; render thread uploads without callback-owned GL |
+| `RenderEngine::QueueInputFrame(...)` | copies the latest input frame into the render mailbox and returns without waiting for GL | render thread uploads latest input without callback-owned GL |
 | `RenderEngine::RequestOutputFrame(...)` | synchronous output request; after render-thread startup it queues output render work and waits for render-thread completion with timeout/failure reporting | render thread executes output frame production |
-| `RenderEngine::TryPresentPreview(...)` | best-effort compatibility shim; after render-thread startup non-render callers queue preview presentation and return | render thread or preview presenter consumes latest completed frame |
-| `RenderEngine::CaptureOutputFrameRgbaTopDown(...)` | synchronous compatibility shim; after render-thread startup it queues screenshot readback and waits for render-thread completion | screenshot request becomes render-thread command |
+| `RenderEngine::TryPresentPreview(...)` | best-effort request; callers queue preview presentation and return | render thread consumes latest completed frame for preview |
+| `RenderEngine::RequestScreenshotCapture(...)` | queues screenshot capture and async disk write completion | screenshot capture is a render-thread command |
 | `OpenGLVideoIOBridge::UploadInputFrame(...)` | copies the latest input frame into the render mailbox and returns without waiting for GL | render thread uploads the latest queued input frame |
 | `OpenGLVideoIOBridge::RenderScheduledFrame(...)` | requests render-thread output production and reports success/failure to the backend | consume render-produced output without callback-owned GL |

@@ -133,10 +133,10 @@ Current implementation:

 - `RenderCommandQueue` exists as a pure C++ mailbox helper.
 - Preview present and screenshot capture requests use latest-value coalescing.
- Input upload requests use latest-value coalescing. During the compatibility phase the input frame memory is still drained immediately; a real render thread will need copied or otherwise owned frame storage.
+- Input upload requests use latest-value coalescing with owned frame bytes copied at enqueue time.
 - Output frame requests use FIFO semantics so scheduled output demand is not collapsed.
 - Render-local reset requests coalesce to the strongest pending reset scope.
- The synchronous compatibility shims submit queued work to the render thread and wait for completion once the render thread is running.
+- Output frame requests use synchronous request/response through the render thread as the remaining transitional playout bridge.

 Possible commands:

@@ -195,7 +195,7 @@ Render-thread-only methods should be private or clearly named:
 - `RenderEngine::RenderOutputFrameOnRenderThread(...)`
 - `RenderEngine::CaptureOutputFrameRgbaTopDownOnRenderThread(...)`

-The current `TryUploadInputFrame`, `RenderOutputFrame`, `TryPresentPreview`, and `CaptureOutputFrameRgbaTopDown` methods can remain as compatibility shims during migration, but their implementations should move toward enqueue-and-wait or enqueue-and-return behavior instead of binding GL directly from the caller's thread.
+The public runtime entrypoints now use queue/request language. `RequestOutputFrame(...)` remains synchronous so the existing DeckLink callback path can keep producing an output frame while Phase 7's producer/consumer playout queue is still future work.

 ## Frame Production Shape

@@ -294,7 +294,7 @@ Screenshot should become:
 - [x] queued render-thread capture request
 - [x] async disk write remains outside render thread

-Current implementation: `OpenGLComposite::RequestScreenshot(...)` builds the output path, queues `RenderEngine::RequestScreenshotCapture(...)`, and the render thread captures pixels before handing them to the existing async PNG writer. Preview presentation is a latest-value best-effort render command; non-render callers enqueue and return, while render-thread callers drain the latest preview command inline.
+Current implementation: `OpenGLComposite::RequestScreenshot(...)` builds the output path, queues `RenderEngine::RequestScreenshotCapture(...)`, and the render thread captures pixels before handing them to the existing async PNG writer. Preview presentation is a latest-value best-effort render command that is queued behind output render work, even when requested from the render pipeline.

 ### Step 7. Remove Shared GL Lock From Normal Paths

@@ -306,6 +306,17 @@ Once all GL entrypoints are render-thread-owned:

 Current implementation: `OpenGLComposite` no longer owns or passes a shared `CRITICAL_SECTION`, and `RenderEngine` no longer has caller-thread GL fallback paths for preview, input upload, output render, or screenshot capture. Runtime callers must go through the render thread; pre-start direct GL fallback is limited to startup initialization while the app explicitly owns the context.

+### Shutdown Order
+
+Current shutdown order is explicit in code:
+
+1. `OpenGLComposite::Stop()` stops runtime services so control/OSC work stops entering the runtime.
+2. `VideoBackend::Stop()` stops DeckLink streams/playout so input and output callbacks stop requesting render work.
+3. `RenderEngine::StopRenderThread()` destroys GL resources on the render thread, signals the render thread to stop, joins it, and unbinds the context on render-thread exit.
+4. `WM_DESTROY` deletes `OpenGLComposite`, unbinds the window context, and deletes the GL context.
+
+This order is build-tested, and `RenderCommandQueue` behavior is covered by non-GL unit tests. It still benefits from a real-window/DeckLink shutdown smoke test, but the code path is explicit enough for Phase 4's design exit.
+
 ## Testing Strategy

 Phase 4 tests should avoid hardware where possible.
@@ -314,17 +325,17 @@ Recommended tests:

 - render command queue preserves FIFO for non-coalesced commands
 - latest-input mailbox drops older frames under load
- stop command wakes and drains the render thread
+- shutdown path stops backend callbacks before stopping and joining the render thread
 - screenshot request receives one completion or failure
- output render request reports timeout/failure if render thread is stopped
+- output render request reports failure if render thread is stopped
 - render reset commands coalesce where expected
- wrong-thread render-only methods are not publicly reachable
+- wrong-thread render-only diagnostics are present on private render-thread helpers

 Existing useful homes:

 - `RuntimeEventTypeTests` for new render/backend observations
 - `RuntimeSubsystemTests` for pure request/coalescing helpers
- a new `RenderThreadTests` target for queue/mailbox/lifecycle helpers that do not require GL
+- a future `RenderThreadTests` target if render-thread lifecycle is extracted behind a non-GL test seam

 Manual verification will still be needed for:

@@ -332,6 +343,7 @@ Manual verification will still be needed for:
 - preview interaction
 - screenshot capture
 - shader reload while rendering
+- real window/context shutdown

 ## Telemetry Added During Phase 4

@@ -378,22 +390,19 @@ Phase 4 can be considered complete once the project can say:
 - [x] input callbacks do not bind GL or wait on GL upload
 - [x] output callbacks do not bind GL directly
 - [x] preview and screenshot requests enter render through explicit render-thread requests
- [ ] `RenderFrameInput` / `RenderFrameState` remain the frame-state contract
+- [x] `RenderFrameInput` / `RenderFrameState` remain the frame-state contract
 - [x] normal frame production no longer depends on a shared GL `CRITICAL_SECTION`
- [ ] render-thread queue/mailbox behavior has non-GL tests
- [ ] shutdown order is explicit and tested or manually verified
+- [x] render-thread queue/mailbox behavior has non-GL tests
+- [x] shutdown order is explicit and tested or manually verified

 ## Open Questions

- Should the first output migration be synchronous request/response, or should Phase 4 go directly to a small ready-frame queue?
- Should the render thread own `RuntimeServiceLiveBridge` calls, or should frame state be prepared just before enqueue?
- How much input frame memory should be copied at enqueue time versus referenced from backend-owned buffers?
+- What exact producer/consumer output queue shape should replace the current synchronous output request in Phase 7?
 - Should preview present on the render thread, or should render publish a preview image/texture to a separate presenter?
- What timeout should output callbacks use if the render thread cannot produce a frame in time?
- Should wrong-thread GL access be enforced with assertions, telemetry, or both?
+- Should wrong-thread GL access eventually escalate from debug diagnostics to structured telemetry or assertions?

 ## Short Version

 Phase 4 should make GL ownership boring and deterministic.

-One render thread owns the context. Other threads submit work or consume results. Input upload, frame rendering, readback, preview, and screenshot capture all move behind render-thread entrypoints. The first implementation can be transitional and partly synchronous, but after Phase 4 the app should no longer rely on callback and UI paths borrowing the GL context under one shared lock.
+One render thread owns the context. Other threads submit work or consume results. Input upload, frame rendering, readback, preview, and screenshot capture all move behind render-thread entrypoints. Output production remains a synchronous request/response bridge for now, but the app no longer relies on callback and UI paths borrowing the GL context under one shared lock.
--- a/docs/subsystems/RenderEngine.md
+++ b/docs/subsystems/RenderEngine.md
@@ -49,7 +49,7 @@ That split is workable today, but it creates architectural pressure:
 - render-local transient state now has clearer Phase 3 boundaries, but GL ownership is still shared through callback and UI entrypoints.
 - it is difficult to test render behavior separately from app bootstrap and hardware integration.

-`RenderEngine` exists to absorb that responsibility into one subsystem with one direction of ownership.
+`RenderEngine` exists to absorb that responsibility into one subsystem with one direction of ownership. Phase 4 has completed the GL ownership part of this target: normal runtime GL work now enters through the `RenderEngine` render thread.

 ## Responsibilities

@@ -153,9 +153,9 @@ Those rules matter because the current codebase often solves timing issues by le

 ## GL Ownership Model

-## Target Rule
+## Current Rule

-One subsystem owns GL. In practice that should mean one render thread becomes the long-lived GL owner in a later phase.
+One subsystem owns GL. `RenderEngine` now starts a dedicated render thread, binds the existing GL context on that thread for normal runtime work, and routes input upload, output render, preview presentation, screenshot capture, shader application, and render-local reset work through render-thread requests.

 The render thread should:

@@ -168,20 +168,20 @@ The render thread should:

 Other threads should interact with the subsystem through queues, snapshots, and completion signals, not by borrowing the GL context.

-## Current State
+## Remaining Timing State

-Today GL work is still shared across callback-driven and UI entrypoints:
+GL ownership is no longer shared across callback-driven and UI entrypoints:

 - input upload is requested through [OpenGLVideoIOBridge::UploadInputFrame()](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/pipeline/OpenGLVideoIOBridge.cpp:11)
 - playout-triggered render is requested through [OpenGLVideoIOBridge::RenderScheduledFrame()](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/pipeline/OpenGLVideoIOBridge.cpp:18)
 - render-pass execution occurs in [OpenGLRenderPipeline::RenderFrame()](/c:/Users/Aiden/Documents/GitHub/video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/pipeline/OpenGLRenderPipeline.cpp:31)
- preview and screenshot paths still enter `RenderEngine` methods that bind the shared context
+- preview and screenshot paths enter `RenderEngine` queue/request methods

-The `CRITICAL_SECTION` protects correctness, but it is not the target architectural model.
+The remaining timing issue is not shared GL ownership; it is the transitional synchronous output request/response path. The DeckLink completion callback still waits while the render thread produces an output frame, fills the DeckLink buffer, and then schedules the next frame.

 ## Migration Direction

-Phase 1 should treat the current bridge lock as a temporary compatibility mechanism. The target path should be:
+The next target path should be:

 1. input callback enqueues frame payloads or references
 2. render thread accepts the latest usable input frame
@@ -189,7 +189,7 @@ Phase 1 should treat the current bridge lock as a temporary compatibility mechan
 4. render thread produces completed output frames ahead of backend demand
 5. backend callbacks only dequeue and schedule pre-rendered frames

-That removes the need for callback threads to ever own GL.
+Phase 4 completed the part that removes callback-thread GL ownership. Phase 7 should complete the producer/consumer playout part.

 ## Render Loop Boundaries

@@ -407,7 +407,7 @@ inside render-owned code paths instead of putting them back into runtime storage

 Introduce snapshot-facing APIs so render no longer depends on broad runtime-state access for frame production.

-Current status: Phase 3 introduced `RenderFrameInput`, `RenderFrameState`, and `RenderFrameStateResolver`, so frame-state selection is named and no longer lives inside GL drawing. Phase 4 can build on that contract while moving GL ownership.
+Current status: Phase 3 introduced `RenderFrameInput`, `RenderFrameState`, and `RenderFrameStateResolver`, so frame-state selection is named and no longer lives inside GL drawing. Phase 4 built on that contract and moved normal runtime GL ownership onto the render thread.

 ### Step 4. Move Uploads Onto Render Ownership