#include "OpenGLRenderPipeline.h" #include "OpenGLRenderer.h" #include "RuntimeHost.h" #include "VideoIOFormat.h" #include #include #include OpenGLRenderPipeline::OpenGLRenderPipeline( OpenGLRenderer& renderer, RuntimeHost& runtimeHost, RenderEffectCallback renderEffect, OutputReadyCallback outputReady, PaintCallback paint) : mRenderer(renderer), mRuntimeHost(runtimeHost), mRenderEffect(renderEffect), mOutputReady(outputReady), mPaint(paint) { } OpenGLRenderPipeline::~OpenGLRenderPipeline() { ResetAsyncReadbackState(); } bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame) { const VideoIOState& state = context.videoState; const auto renderStartTime = std::chrono::steady_clock::now(); glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.CompositeFramebuffer()); mRenderEffect(); glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.CompositeFramebuffer()); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mRenderer.OutputFramebuffer()); glBlitFramebuffer(0, 0, state.inputFrameSize.width, state.inputFrameSize.height, 0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_COLOR_BUFFER_BIT, GL_LINEAR); glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputFramebuffer()); if (mOutputReady) mOutputReady(); if (state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10) PackOutputFor10Bit(state); glFlush(); const auto renderEndTime = std::chrono::steady_clock::now(); const double renderMilliseconds = std::chrono::duration_cast>(renderEndTime - renderStartTime).count(); mRuntimeHost.TrySetPerformanceStats(state.frameBudgetMilliseconds, renderMilliseconds); mRuntimeHost.TryAdvanceFrame(); ReadOutputFrame(state, outputFrame); if (mPaint) mPaint(); return true; } void OpenGLRenderPipeline::PackOutputFor10Bit(const VideoIOState& state) { glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputPackFramebuffer()); glViewport(0, 0, state.outputPackTextureWidth, state.outputFrameSize.height); glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); glDisable(GL_DEPTH_TEST); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, mRenderer.OutputTexture()); glBindVertexArray(mRenderer.FullscreenVertexArray()); glUseProgram(mRenderer.OutputPackProgram()); const GLint outputResolutionLocation = mRenderer.OutputPackResolutionLocation(); const GLint activeWordsLocation = mRenderer.OutputPackActiveWordsLocation(); const GLint packFormatLocation = mRenderer.OutputPackFormatLocation(); if (outputResolutionLocation >= 0) glUniform2f(outputResolutionLocation, static_cast(state.outputFrameSize.width), static_cast(state.outputFrameSize.height)); if (activeWordsLocation >= 0) glUniform1f(activeWordsLocation, static_cast(ActiveV210WordsForWidth(state.outputFrameSize.width))); if (packFormatLocation >= 0) glUniform1i(packFormatLocation, state.outputPixelFormat == VideoIOPixelFormat::Yuva10 ? 2 : 1); glDrawArrays(GL_TRIANGLES, 0, 3); glUseProgram(0); glBindVertexArray(0); glBindTexture(GL_TEXTURE_2D, 0); } bool OpenGLRenderPipeline::EnsureAsyncReadbackBuffers(std::size_t requiredBytes) { if (requiredBytes == 0) return false; if (mAsyncReadbackBytes == requiredBytes && mAsyncReadbackSlots[0].pixelPackBuffer != 0) return true; ResetAsyncReadbackState(); mAsyncReadbackBytes = requiredBytes; for (AsyncReadbackSlot& slot : mAsyncReadbackSlots) { glGenBuffers(1, &slot.pixelPackBuffer); glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer); glBufferData(GL_PIXEL_PACK_BUFFER, static_cast(requiredBytes), nullptr, GL_STREAM_READ); slot.sizeBytes = requiredBytes; slot.inFlight = false; } glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); mAsyncReadbackWriteIndex = 0; mAsyncReadbackReadIndex = 0; return true; } void OpenGLRenderPipeline::ResetAsyncReadbackState() { FlushAsyncReadbackPipeline(); for (AsyncReadbackSlot& slot : mAsyncReadbackSlots) slot.sizeBytes = 0; if (mAsyncReadbackSlots[0].pixelPackBuffer != 0) { for (AsyncReadbackSlot& slot : mAsyncReadbackSlots) { if (slot.pixelPackBuffer != 0) { glDeleteBuffers(1, &slot.pixelPackBuffer); slot.pixelPackBuffer = 0; } } } mAsyncReadbackWriteIndex = 0; mAsyncReadbackReadIndex = 0; mAsyncReadbackBytes = 0; } void OpenGLRenderPipeline::FlushAsyncReadbackPipeline() { for (AsyncReadbackSlot& slot : mAsyncReadbackSlots) { if (slot.fence != nullptr) { glDeleteSync(slot.fence); slot.fence = nullptr; } slot.inFlight = false; } mAsyncReadbackWriteIndex = 0; mAsyncReadbackReadIndex = 0; } void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state) { const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10; const std::size_t requiredBytes = static_cast(state.outputFrameRowBytes) * state.outputFrameSize.height; const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA; const GLenum type = usePackedOutput ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_8_8_8_8_REV; const GLuint framebuffer = usePackedOutput ? mRenderer.OutputPackFramebuffer() : mRenderer.OutputFramebuffer(); const GLsizei readWidth = static_cast(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width); const GLsizei readHeight = static_cast(state.outputFrameSize.height); if (requiredBytes == 0) return; if (mAsyncReadbackBytes != requiredBytes || mAsyncReadbackFormat != format || mAsyncReadbackType != type || mAsyncReadbackFramebuffer != framebuffer) { mAsyncReadbackFormat = format; mAsyncReadbackType = type; mAsyncReadbackFramebuffer = framebuffer; if (!EnsureAsyncReadbackBuffers(requiredBytes)) return; } AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex]; if (slot.fence != nullptr) { glDeleteSync(slot.fence); slot.fence = nullptr; } glPixelStorei(GL_PACK_ALIGNMENT, 4); glPixelStorei(GL_PACK_ROW_LENGTH, 0); glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer); glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer); glBufferData(GL_PIXEL_PACK_BUFFER, static_cast(requiredBytes), nullptr, GL_STREAM_READ); glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr); slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); slot.inFlight = slot.fence != nullptr; glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size(); } bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds) { if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr) return false; AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackReadIndex]; if (!slot.inFlight || slot.fence == nullptr || slot.pixelPackBuffer == 0) return false; const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0; const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds); if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED) return false; glDeleteSync(slot.fence); slot.fence = nullptr; glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer); void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); if (mappedBytes == nullptr) { glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); slot.inFlight = false; mAsyncReadbackReadIndex = (mAsyncReadbackReadIndex + 1) % mAsyncReadbackSlots.size(); return false; } std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes); glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); slot.inFlight = false; mAsyncReadbackReadIndex = (mAsyncReadbackReadIndex + 1) % mAsyncReadbackSlots.size(); CacheOutputFrame(outputFrame); return true; } void OpenGLRenderPipeline::CacheOutputFrame(const VideoIOOutputFrame& outputFrame) { if (outputFrame.bytes == nullptr || outputFrame.height == 0 || outputFrame.rowBytes <= 0) return; const std::size_t byteCount = static_cast(outputFrame.rowBytes) * outputFrame.height; mCachedOutputFrame.resize(byteCount); std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount); } void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes) { const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10; glPixelStorei(GL_PACK_ALIGNMENT, 4); glPixelStorei(GL_PACK_ROW_LENGTH, 0); if (usePackedOutput) { glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputPackFramebuffer()); glReadPixels(0, 0, state.outputPackTextureWidth, state.outputFrameSize.height, GL_RGBA, GL_UNSIGNED_BYTE, destinationBytes); } else { glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer()); glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes); } } void OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame) { if (TryConsumeAsyncReadback(outputFrame, 500000)) { QueueAsyncReadback(state); return; } // If async readback misses the playout deadline, prefer a fresh synchronous // frame over reusing stale cached output, then restart the async pipeline. if (outputFrame.bytes != nullptr) { ReadOutputFrameSynchronously(state, outputFrame.bytes); CacheOutputFrame(outputFrame); } FlushAsyncReadbackPipeline(); QueueAsyncReadback(state); }