Files
video-shader-toys/apps/LoopThroughWithOpenGLCompositing/gl/pipeline/OpenGLRenderPipeline.cpp
Aiden 93d856b3b6
Some checks failed
CI / React UI Build (push) Successful in 37s
CI / Windows Release Package (push) Has been cancelled
CI / Native Windows Build And Tests (push) Has been cancelled
CPU optimisations
2026-05-09 13:50:27 +10:00

280 lines
9.5 KiB
C++

#include "OpenGLRenderPipeline.h"
#include "OpenGLRenderer.h"
#include "RuntimeHost.h"
#include "VideoIOFormat.h"
#include <cstring>
#include <chrono>
#include <gl/gl.h>
OpenGLRenderPipeline::OpenGLRenderPipeline(
OpenGLRenderer& renderer,
RuntimeHost& runtimeHost,
RenderEffectCallback renderEffect,
OutputReadyCallback outputReady,
PaintCallback paint) :
mRenderer(renderer),
mRuntimeHost(runtimeHost),
mRenderEffect(renderEffect),
mOutputReady(outputReady),
mPaint(paint)
{
}
OpenGLRenderPipeline::~OpenGLRenderPipeline()
{
ResetAsyncReadbackState();
}
bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame)
{
const VideoIOState& state = context.videoState;
const auto renderStartTime = std::chrono::steady_clock::now();
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.CompositeFramebuffer());
mRenderEffect();
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.CompositeFramebuffer());
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mRenderer.OutputFramebuffer());
glBlitFramebuffer(0, 0, state.inputFrameSize.width, state.inputFrameSize.height, 0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_COLOR_BUFFER_BIT, GL_LINEAR);
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputFramebuffer());
if (mOutputReady)
mOutputReady();
if (state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10)
PackOutputFor10Bit(state);
glFlush();
const auto renderEndTime = std::chrono::steady_clock::now();
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderEndTime - renderStartTime).count();
mRuntimeHost.TrySetPerformanceStats(state.frameBudgetMilliseconds, renderMilliseconds);
mRuntimeHost.TryAdvanceFrame();
ReadOutputFrame(state, outputFrame);
if (mPaint)
mPaint();
return true;
}
void OpenGLRenderPipeline::PackOutputFor10Bit(const VideoIOState& state)
{
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
glViewport(0, 0, state.outputPackTextureWidth, state.outputFrameSize.height);
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDisable(GL_DEPTH_TEST);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, mRenderer.OutputTexture());
glBindVertexArray(mRenderer.FullscreenVertexArray());
glUseProgram(mRenderer.OutputPackProgram());
const GLint outputResolutionLocation = mRenderer.OutputPackResolutionLocation();
const GLint activeWordsLocation = mRenderer.OutputPackActiveWordsLocation();
const GLint packFormatLocation = mRenderer.OutputPackFormatLocation();
if (outputResolutionLocation >= 0)
glUniform2f(outputResolutionLocation, static_cast<float>(state.outputFrameSize.width), static_cast<float>(state.outputFrameSize.height));
if (activeWordsLocation >= 0)
glUniform1f(activeWordsLocation, static_cast<float>(ActiveV210WordsForWidth(state.outputFrameSize.width)));
if (packFormatLocation >= 0)
glUniform1i(packFormatLocation, state.outputPixelFormat == VideoIOPixelFormat::Yuva10 ? 2 : 1);
glDrawArrays(GL_TRIANGLES, 0, 3);
glUseProgram(0);
glBindVertexArray(0);
glBindTexture(GL_TEXTURE_2D, 0);
}
bool OpenGLRenderPipeline::EnsureAsyncReadbackBuffers(std::size_t requiredBytes)
{
if (requiredBytes == 0)
return false;
if (mAsyncReadbackBytes == requiredBytes && mAsyncReadbackSlots[0].pixelPackBuffer != 0)
return true;
ResetAsyncReadbackState();
mAsyncReadbackBytes = requiredBytes;
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
{
glGenBuffers(1, &slot.pixelPackBuffer);
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
slot.sizeBytes = requiredBytes;
slot.inFlight = false;
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
mAsyncReadbackWriteIndex = 0;
mAsyncReadbackReadIndex = 0;
return true;
}
void OpenGLRenderPipeline::ResetAsyncReadbackState()
{
FlushAsyncReadbackPipeline();
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
slot.sizeBytes = 0;
if (mAsyncReadbackSlots[0].pixelPackBuffer != 0)
{
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
{
if (slot.pixelPackBuffer != 0)
{
glDeleteBuffers(1, &slot.pixelPackBuffer);
slot.pixelPackBuffer = 0;
}
}
}
mAsyncReadbackWriteIndex = 0;
mAsyncReadbackReadIndex = 0;
mAsyncReadbackBytes = 0;
}
void OpenGLRenderPipeline::FlushAsyncReadbackPipeline()
{
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
{
if (slot.fence != nullptr)
{
glDeleteSync(slot.fence);
slot.fence = nullptr;
}
slot.inFlight = false;
}
mAsyncReadbackWriteIndex = 0;
mAsyncReadbackReadIndex = 0;
}
void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
{
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height;
const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA;
const GLenum type = usePackedOutput ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_8_8_8_8_REV;
const GLuint framebuffer = usePackedOutput ? mRenderer.OutputPackFramebuffer() : mRenderer.OutputFramebuffer();
const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width);
const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height);
if (requiredBytes == 0)
return;
if (mAsyncReadbackBytes != requiredBytes
|| mAsyncReadbackFormat != format
|| mAsyncReadbackType != type
|| mAsyncReadbackFramebuffer != framebuffer)
{
mAsyncReadbackFormat = format;
mAsyncReadbackType = type;
mAsyncReadbackFramebuffer = framebuffer;
if (!EnsureAsyncReadbackBuffers(requiredBytes))
return;
}
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex];
if (slot.fence != nullptr)
{
glDeleteSync(slot.fence);
slot.fence = nullptr;
}
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr);
slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
slot.inFlight = slot.fence != nullptr;
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size();
}
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds)
{
if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr)
return false;
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackReadIndex];
if (!slot.inFlight || slot.fence == nullptr || slot.pixelPackBuffer == 0)
return false;
const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0;
const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds);
if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
return false;
glDeleteSync(slot.fence);
slot.fence = nullptr;
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
if (mappedBytes == nullptr)
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
slot.inFlight = false;
mAsyncReadbackReadIndex = (mAsyncReadbackReadIndex + 1) % mAsyncReadbackSlots.size();
return false;
}
std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
slot.inFlight = false;
mAsyncReadbackReadIndex = (mAsyncReadbackReadIndex + 1) % mAsyncReadbackSlots.size();
CacheOutputFrame(outputFrame);
return true;
}
void OpenGLRenderPipeline::CacheOutputFrame(const VideoIOOutputFrame& outputFrame)
{
if (outputFrame.bytes == nullptr || outputFrame.height == 0 || outputFrame.rowBytes <= 0)
return;
const std::size_t byteCount = static_cast<std::size_t>(outputFrame.rowBytes) * outputFrame.height;
mCachedOutputFrame.resize(byteCount);
std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount);
}
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes)
{
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
if (usePackedOutput)
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
glReadPixels(0, 0, state.outputPackTextureWidth, state.outputFrameSize.height, GL_RGBA, GL_UNSIGNED_BYTE, destinationBytes);
}
else
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes);
}
}
void OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
{
if (TryConsumeAsyncReadback(outputFrame, 500000))
{
QueueAsyncReadback(state);
return;
}
// If async readback misses the playout deadline, prefer a fresh synchronous
// frame over reusing stale cached output, then restart the async pipeline.
if (outputFrame.bytes != nullptr)
{
ReadOutputFrameSynchronously(state, outputFrame.bytes);
CacheOutputFrame(outputFrame);
}
FlushAsyncReadbackPipeline();
QueueAsyncReadback(state);
}