CPU optimisations
This commit is contained in:
@@ -531,7 +531,7 @@ LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
|
||||
if (!sInteractiveResize && pOpenGLComposite)
|
||||
{
|
||||
wglMakeCurrent(hDC, hRC);
|
||||
pOpenGLComposite->paintGL();
|
||||
pOpenGLComposite->paintGL(true);
|
||||
wglMakeCurrent( NULL, NULL );
|
||||
RaiseStatusControls(sStatusStrip);
|
||||
}
|
||||
|
||||
@@ -501,6 +501,9 @@ bool ControlServer::SendWebSocketText(SOCKET clientSocket, const std::string& pa
|
||||
|
||||
void ControlServer::BroadcastStateLocked()
|
||||
{
|
||||
if (mClients.empty())
|
||||
return;
|
||||
|
||||
const std::string stateMessage = mCallbacks.getStateJson ? mCallbacks.getStateJson() : "{}";
|
||||
for (auto it = mClients.begin(); it != mClients.end();)
|
||||
{
|
||||
|
||||
@@ -35,7 +35,7 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) :
|
||||
*mRuntimeHost,
|
||||
[this]() { renderEffect(); },
|
||||
[this]() { ProcessScreenshotRequest(); },
|
||||
[this]() { paintGL(); });
|
||||
[this]() { paintGL(false); });
|
||||
mVideoIOBridge = std::make_unique<OpenGLVideoIOBridge>(
|
||||
*mVideoIO,
|
||||
*mRenderer,
|
||||
@@ -156,8 +156,26 @@ error:
|
||||
return false;
|
||||
}
|
||||
|
||||
void OpenGLComposite::paintGL()
|
||||
void OpenGLComposite::paintGL(bool force)
|
||||
{
|
||||
if (!force)
|
||||
{
|
||||
if (IsIconic(hGLWnd))
|
||||
return;
|
||||
|
||||
const unsigned previewFps = mRuntimeHost ? mRuntimeHost->GetPreviewFps() : 30u;
|
||||
if (previewFps == 0)
|
||||
return;
|
||||
|
||||
const auto now = std::chrono::steady_clock::now();
|
||||
const auto minimumInterval = std::chrono::microseconds(1000000 / (previewFps == 0 ? 1u : previewFps));
|
||||
if (mLastPreviewPresentTime != std::chrono::steady_clock::time_point() &&
|
||||
now - mLastPreviewPresentTime < minimumInterval)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!TryEnterCriticalSection(&pMutex))
|
||||
{
|
||||
ValidateRect(hGLWnd, NULL);
|
||||
@@ -165,6 +183,7 @@ void OpenGLComposite::paintGL()
|
||||
}
|
||||
|
||||
mRenderer->PresentToWindow(hGLDC, mVideoIO->OutputFrameWidth(), mVideoIO->OutputFrameHeight());
|
||||
mLastPreviewPresentTime = std::chrono::steady_clock::now();
|
||||
ValidateRect(hGLWnd, NULL);
|
||||
LeaveCriticalSection(&pMutex);
|
||||
}
|
||||
@@ -314,15 +333,35 @@ void OpenGLComposite::renderEffect()
|
||||
}
|
||||
else if (mRuntimeHost)
|
||||
{
|
||||
if (mRuntimeHost->TryGetLayerRenderStates(mVideoIO->InputFrameWidth(), mVideoIO->InputFrameHeight(), layerStates))
|
||||
{
|
||||
mCachedLayerRenderStates = layerStates;
|
||||
}
|
||||
else
|
||||
const unsigned renderWidth = mVideoIO->InputFrameWidth();
|
||||
const unsigned renderHeight = mVideoIO->InputFrameHeight();
|
||||
const uint64_t renderStateVersion = mRuntimeHost->GetRenderStateVersion();
|
||||
const bool renderStateCacheValid =
|
||||
!mCachedLayerRenderStates.empty() &&
|
||||
mCachedRenderStateVersion == renderStateVersion &&
|
||||
mCachedRenderStateWidth == renderWidth &&
|
||||
mCachedRenderStateHeight == renderHeight;
|
||||
|
||||
if (renderStateCacheValid)
|
||||
{
|
||||
layerStates = mCachedLayerRenderStates;
|
||||
mRuntimeHost->RefreshDynamicRenderStateFields(layerStates);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mRuntimeHost->TryGetLayerRenderStates(renderWidth, renderHeight, layerStates))
|
||||
{
|
||||
mCachedLayerRenderStates = layerStates;
|
||||
mCachedRenderStateVersion = renderStateVersion;
|
||||
mCachedRenderStateWidth = renderWidth;
|
||||
mCachedRenderStateHeight = renderHeight;
|
||||
}
|
||||
else
|
||||
{
|
||||
layerStates = mCachedLayerRenderStates;
|
||||
mRuntimeHost->RefreshDynamicRenderStateFields(layerStates);
|
||||
}
|
||||
}
|
||||
}
|
||||
const unsigned historyCap = mRuntimeHost ? mRuntimeHost->GetMaxTemporalHistoryFrames() : 0;
|
||||
mRenderPass->Render(
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <chrono>
|
||||
|
||||
class VideoIODevice;
|
||||
class OpenGLVideoIOBridge;
|
||||
@@ -64,7 +65,7 @@ public:
|
||||
std::string GetOscAddress() const;
|
||||
|
||||
void resizeGL(WORD width, WORD height);
|
||||
void paintGL();
|
||||
void paintGL(bool force = false);
|
||||
|
||||
private:
|
||||
void resizeWindow(int width, int height);
|
||||
@@ -87,8 +88,12 @@ private:
|
||||
std::unique_ptr<ShaderBuildQueue> mShaderBuildQueue;
|
||||
std::unique_ptr<RuntimeServices> mRuntimeServices;
|
||||
std::vector<RuntimeRenderState> mCachedLayerRenderStates;
|
||||
uint64_t mCachedRenderStateVersion = 0;
|
||||
unsigned mCachedRenderStateWidth = 0;
|
||||
unsigned mCachedRenderStateHeight = 0;
|
||||
std::atomic<bool> mUseCommittedLayerStates;
|
||||
std::atomic<bool> mScreenshotRequested;
|
||||
std::chrono::steady_clock::time_point mLastPreviewPresentTime;
|
||||
|
||||
bool InitOpenGLState();
|
||||
void renderEffect();
|
||||
|
||||
@@ -45,7 +45,7 @@ void OpenGLRenderPass::Render(
|
||||
}
|
||||
else
|
||||
{
|
||||
const std::vector<RenderPassDescriptor> passes = BuildLayerPassDescriptors(layerStates, layerPrograms);
|
||||
const std::vector<RenderPassDescriptor>& passes = BuildLayerPassDescriptors(layerStates, layerPrograms);
|
||||
for (const RenderPassDescriptor& pass : passes)
|
||||
{
|
||||
RenderLayerPass(
|
||||
@@ -71,9 +71,9 @@ void OpenGLRenderPass::RenderDecodePass(unsigned inputFrameWidth, unsigned input
|
||||
glBindVertexArray(mRenderer.FullscreenVertexArray());
|
||||
glUseProgram(mRenderer.DecodeProgram());
|
||||
|
||||
const GLint packedResolutionLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uPackedVideoResolution");
|
||||
const GLint decodedResolutionLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uDecodedVideoResolution");
|
||||
const GLint inputPixelFormatLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uInputPixelFormat");
|
||||
const GLint packedResolutionLocation = mRenderer.DecodePackedResolutionLocation();
|
||||
const GLint decodedResolutionLocation = mRenderer.DecodeDecodedResolutionLocation();
|
||||
const GLint inputPixelFormatLocation = mRenderer.DecodeInputPixelFormatLocation();
|
||||
if (packedResolutionLocation >= 0)
|
||||
glUniform2f(packedResolutionLocation, static_cast<float>(captureTextureWidth), static_cast<float>(inputFrameHeight));
|
||||
if (decodedResolutionLocation >= 0)
|
||||
@@ -96,7 +96,8 @@ std::vector<RenderPassDescriptor> OpenGLRenderPass::BuildLayerPassDescriptors(
|
||||
// Flatten the layer stack into concrete GL passes. A layer may now contain
|
||||
// several shader passes, but the outer stack still sees one visible output
|
||||
// per layer.
|
||||
std::vector<RenderPassDescriptor> passes;
|
||||
std::vector<RenderPassDescriptor>& passes = mPassScratch;
|
||||
passes.clear();
|
||||
const std::size_t passCount = layerStates.size() < layerPrograms.size() ? layerStates.size() : layerPrograms.size();
|
||||
std::size_t descriptorCount = 0;
|
||||
for (std::size_t index = 0; index < passCount; ++index)
|
||||
|
||||
@@ -56,4 +56,5 @@ private:
|
||||
|
||||
OpenGLRenderer& mRenderer;
|
||||
ShaderTextureBindings mTextureBindings;
|
||||
mutable std::vector<RenderPassDescriptor> mPassScratch;
|
||||
};
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#include "RuntimeHost.h"
|
||||
#include "VideoIOFormat.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <chrono>
|
||||
#include <gl/gl.h>
|
||||
|
||||
@@ -21,6 +23,11 @@ OpenGLRenderPipeline::OpenGLRenderPipeline(
|
||||
{
|
||||
}
|
||||
|
||||
OpenGLRenderPipeline::~OpenGLRenderPipeline()
|
||||
{
|
||||
ResetAsyncReadbackState();
|
||||
}
|
||||
|
||||
bool OpenGLRenderPipeline::RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame)
|
||||
{
|
||||
const VideoIOState& state = context.videoState;
|
||||
@@ -62,9 +69,9 @@ void OpenGLRenderPipeline::PackOutputFor10Bit(const VideoIOState& state)
|
||||
glBindVertexArray(mRenderer.FullscreenVertexArray());
|
||||
glUseProgram(mRenderer.OutputPackProgram());
|
||||
|
||||
const GLint outputResolutionLocation = glGetUniformLocation(mRenderer.OutputPackProgram(), "uOutputVideoResolution");
|
||||
const GLint activeWordsLocation = glGetUniformLocation(mRenderer.OutputPackProgram(), "uActiveV210Words");
|
||||
const GLint packFormatLocation = glGetUniformLocation(mRenderer.OutputPackProgram(), "uOutputPackFormat");
|
||||
const GLint outputResolutionLocation = mRenderer.OutputPackResolutionLocation();
|
||||
const GLint activeWordsLocation = mRenderer.OutputPackActiveWordsLocation();
|
||||
const GLint packFormatLocation = mRenderer.OutputPackFormatLocation();
|
||||
if (outputResolutionLocation >= 0)
|
||||
glUniform2f(outputResolutionLocation, static_cast<float>(state.outputFrameSize.width), static_cast<float>(state.outputFrameSize.height));
|
||||
if (activeWordsLocation >= 0)
|
||||
@@ -78,18 +85,195 @@ void OpenGLRenderPipeline::PackOutputFor10Bit(const VideoIOState& state)
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
|
||||
bool OpenGLRenderPipeline::EnsureAsyncReadbackBuffers(std::size_t requiredBytes)
|
||||
{
|
||||
if (requiredBytes == 0)
|
||||
return false;
|
||||
|
||||
if (mAsyncReadbackBytes == requiredBytes && mAsyncReadbackSlots[0].pixelPackBuffer != 0)
|
||||
return true;
|
||||
|
||||
ResetAsyncReadbackState();
|
||||
mAsyncReadbackBytes = requiredBytes;
|
||||
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
|
||||
{
|
||||
glGenBuffers(1, &slot.pixelPackBuffer);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
|
||||
slot.sizeBytes = requiredBytes;
|
||||
slot.inFlight = false;
|
||||
}
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
mAsyncReadbackWriteIndex = 0;
|
||||
mAsyncReadbackReadIndex = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::ResetAsyncReadbackState()
|
||||
{
|
||||
FlushAsyncReadbackPipeline();
|
||||
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
|
||||
slot.sizeBytes = 0;
|
||||
|
||||
if (mAsyncReadbackSlots[0].pixelPackBuffer != 0)
|
||||
{
|
||||
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
|
||||
{
|
||||
if (slot.pixelPackBuffer != 0)
|
||||
{
|
||||
glDeleteBuffers(1, &slot.pixelPackBuffer);
|
||||
slot.pixelPackBuffer = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mAsyncReadbackWriteIndex = 0;
|
||||
mAsyncReadbackReadIndex = 0;
|
||||
mAsyncReadbackBytes = 0;
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::FlushAsyncReadbackPipeline()
|
||||
{
|
||||
for (AsyncReadbackSlot& slot : mAsyncReadbackSlots)
|
||||
{
|
||||
if (slot.fence != nullptr)
|
||||
{
|
||||
glDeleteSync(slot.fence);
|
||||
slot.fence = nullptr;
|
||||
}
|
||||
slot.inFlight = false;
|
||||
}
|
||||
|
||||
mAsyncReadbackWriteIndex = 0;
|
||||
mAsyncReadbackReadIndex = 0;
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::QueueAsyncReadback(const VideoIOState& state)
|
||||
{
|
||||
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
|
||||
const std::size_t requiredBytes = static_cast<std::size_t>(state.outputFrameRowBytes) * state.outputFrameSize.height;
|
||||
const GLenum format = usePackedOutput ? GL_RGBA : GL_BGRA;
|
||||
const GLenum type = usePackedOutput ? GL_UNSIGNED_BYTE : GL_UNSIGNED_INT_8_8_8_8_REV;
|
||||
const GLuint framebuffer = usePackedOutput ? mRenderer.OutputPackFramebuffer() : mRenderer.OutputFramebuffer();
|
||||
const GLsizei readWidth = static_cast<GLsizei>(usePackedOutput ? state.outputPackTextureWidth : state.outputFrameSize.width);
|
||||
const GLsizei readHeight = static_cast<GLsizei>(state.outputFrameSize.height);
|
||||
|
||||
if (requiredBytes == 0)
|
||||
return;
|
||||
|
||||
if (mAsyncReadbackBytes != requiredBytes
|
||||
|| mAsyncReadbackFormat != format
|
||||
|| mAsyncReadbackType != type
|
||||
|| mAsyncReadbackFramebuffer != framebuffer)
|
||||
{
|
||||
mAsyncReadbackFormat = format;
|
||||
mAsyncReadbackType = type;
|
||||
mAsyncReadbackFramebuffer = framebuffer;
|
||||
if (!EnsureAsyncReadbackBuffers(requiredBytes))
|
||||
return;
|
||||
}
|
||||
|
||||
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackWriteIndex];
|
||||
if (slot.fence != nullptr)
|
||||
{
|
||||
glDeleteSync(slot.fence);
|
||||
slot.fence = nullptr;
|
||||
}
|
||||
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
if (state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10)
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, framebuffer);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, static_cast<GLsizeiptr>(requiredBytes), nullptr, GL_STREAM_READ);
|
||||
glReadPixels(0, 0, readWidth, readHeight, format, type, nullptr);
|
||||
slot.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
slot.inFlight = slot.fence != nullptr;
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
|
||||
mAsyncReadbackWriteIndex = (mAsyncReadbackWriteIndex + 1) % mAsyncReadbackSlots.size();
|
||||
}
|
||||
|
||||
bool OpenGLRenderPipeline::TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds)
|
||||
{
|
||||
if (mAsyncReadbackBytes == 0 || outputFrame.bytes == nullptr)
|
||||
return false;
|
||||
|
||||
AsyncReadbackSlot& slot = mAsyncReadbackSlots[mAsyncReadbackReadIndex];
|
||||
if (!slot.inFlight || slot.fence == nullptr || slot.pixelPackBuffer == 0)
|
||||
return false;
|
||||
|
||||
const GLenum waitFlags = timeoutNanoseconds > 0 ? GL_SYNC_FLUSH_COMMANDS_BIT : 0;
|
||||
const GLenum waitResult = glClientWaitSync(slot.fence, waitFlags, timeoutNanoseconds);
|
||||
if (waitResult != GL_ALREADY_SIGNALED && waitResult != GL_CONDITION_SATISFIED)
|
||||
return false;
|
||||
|
||||
glDeleteSync(slot.fence);
|
||||
slot.fence = nullptr;
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, slot.pixelPackBuffer);
|
||||
void* mappedBytes = glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
|
||||
if (mappedBytes == nullptr)
|
||||
{
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
slot.inFlight = false;
|
||||
mAsyncReadbackReadIndex = (mAsyncReadbackReadIndex + 1) % mAsyncReadbackSlots.size();
|
||||
return false;
|
||||
}
|
||||
|
||||
std::memcpy(outputFrame.bytes, mappedBytes, slot.sizeBytes);
|
||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
|
||||
slot.inFlight = false;
|
||||
mAsyncReadbackReadIndex = (mAsyncReadbackReadIndex + 1) % mAsyncReadbackSlots.size();
|
||||
CacheOutputFrame(outputFrame);
|
||||
return true;
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::CacheOutputFrame(const VideoIOOutputFrame& outputFrame)
|
||||
{
|
||||
if (outputFrame.bytes == nullptr || outputFrame.height == 0 || outputFrame.rowBytes <= 0)
|
||||
return;
|
||||
|
||||
const std::size_t byteCount = static_cast<std::size_t>(outputFrame.rowBytes) * outputFrame.height;
|
||||
mCachedOutputFrame.resize(byteCount);
|
||||
std::memcpy(mCachedOutputFrame.data(), outputFrame.bytes, byteCount);
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes)
|
||||
{
|
||||
const bool usePackedOutput = state.outputPixelFormat == VideoIOPixelFormat::V210 || state.outputPixelFormat == VideoIOPixelFormat::Yuva10;
|
||||
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 4);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
if (usePackedOutput)
|
||||
{
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
|
||||
glReadPixels(0, 0, state.outputPackTextureWidth, state.outputFrameSize.height, GL_RGBA, GL_UNSIGNED_BYTE, outputFrame.bytes);
|
||||
glReadPixels(0, 0, state.outputPackTextureWidth, state.outputFrameSize.height, GL_RGBA, GL_UNSIGNED_BYTE, destinationBytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
|
||||
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, outputFrame.bytes);
|
||||
glReadPixels(0, 0, state.outputFrameSize.width, state.outputFrameSize.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, destinationBytes);
|
||||
}
|
||||
}
|
||||
|
||||
void OpenGLRenderPipeline::ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame)
|
||||
{
|
||||
if (TryConsumeAsyncReadback(outputFrame, 500000))
|
||||
{
|
||||
QueueAsyncReadback(state);
|
||||
return;
|
||||
}
|
||||
|
||||
// If async readback misses the playout deadline, prefer a fresh synchronous
|
||||
// frame over reusing stale cached output, then restart the async pipeline.
|
||||
if (outputFrame.bytes != nullptr)
|
||||
{
|
||||
ReadOutputFrameSynchronously(state, outputFrame.bytes);
|
||||
CacheOutputFrame(outputFrame);
|
||||
}
|
||||
|
||||
FlushAsyncReadbackPipeline();
|
||||
QueueAsyncReadback(state);
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include "GLExtensions.h"
|
||||
#include "VideoIOTypes.h"
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
class OpenGLRenderer;
|
||||
class RuntimeHost;
|
||||
@@ -26,10 +29,26 @@ public:
|
||||
RenderEffectCallback renderEffect,
|
||||
OutputReadyCallback outputReady,
|
||||
PaintCallback paint);
|
||||
~OpenGLRenderPipeline();
|
||||
|
||||
bool RenderFrame(const RenderPipelineFrameContext& context, VideoIOOutputFrame& outputFrame);
|
||||
|
||||
private:
|
||||
struct AsyncReadbackSlot
|
||||
{
|
||||
GLuint pixelPackBuffer = 0;
|
||||
GLsync fence = nullptr;
|
||||
std::size_t sizeBytes = 0;
|
||||
bool inFlight = false;
|
||||
};
|
||||
|
||||
bool EnsureAsyncReadbackBuffers(std::size_t requiredBytes);
|
||||
void ResetAsyncReadbackState();
|
||||
void FlushAsyncReadbackPipeline();
|
||||
void QueueAsyncReadback(const VideoIOState& state);
|
||||
bool TryConsumeAsyncReadback(VideoIOOutputFrame& outputFrame, GLuint64 timeoutNanoseconds);
|
||||
void CacheOutputFrame(const VideoIOOutputFrame& outputFrame);
|
||||
void ReadOutputFrameSynchronously(const VideoIOState& state, void* destinationBytes);
|
||||
void PackOutputFor10Bit(const VideoIOState& state);
|
||||
void ReadOutputFrame(const VideoIOState& state, VideoIOOutputFrame& outputFrame);
|
||||
|
||||
@@ -38,4 +57,12 @@ private:
|
||||
RenderEffectCallback mRenderEffect;
|
||||
OutputReadyCallback mOutputReady;
|
||||
PaintCallback mPaint;
|
||||
std::array<AsyncReadbackSlot, 3> mAsyncReadbackSlots;
|
||||
std::size_t mAsyncReadbackWriteIndex = 0;
|
||||
std::size_t mAsyncReadbackReadIndex = 0;
|
||||
std::size_t mAsyncReadbackBytes = 0;
|
||||
GLenum mAsyncReadbackFormat = GL_BGRA;
|
||||
GLenum mAsyncReadbackType = GL_UNSIGNED_INT_8_8_8_8_REV;
|
||||
GLuint mAsyncReadbackFramebuffer = 0;
|
||||
std::vector<unsigned char> mCachedOutputFrame;
|
||||
};
|
||||
|
||||
@@ -62,6 +62,8 @@ PFNGLGENBUFFERSPROC glGenBuffers;
|
||||
PFNGLDELETEBUFFERSPROC glDeleteBuffers;
|
||||
PFNGLBINDBUFFERPROC glBindBuffer;
|
||||
PFNGLBUFFERDATAPROC glBufferData;
|
||||
PFNGLMAPBUFFERPROC glMapBuffer;
|
||||
PFNGLUNMAPBUFFERPROC glUnmapBuffer;
|
||||
PFNGLBUFFERSUBDATAPROC glBufferSubData;
|
||||
PFNGLBINDBUFFERBASEPROC glBindBufferBase;
|
||||
PFNGLACTIVETEXTUREPROC glActiveTexture;
|
||||
@@ -131,6 +133,8 @@ bool ResolveGLExtensions()
|
||||
glDeleteBuffers = (PFNGLDELETEBUFFERSPROC) wglGetProcAddress("glDeleteBuffers");
|
||||
glBindBuffer = (PFNGLBINDBUFFERPROC) wglGetProcAddress("glBindBuffer");
|
||||
glBufferData = (PFNGLBUFFERDATAPROC) wglGetProcAddress("glBufferData");
|
||||
glMapBuffer = (PFNGLMAPBUFFERPROC) wglGetProcAddress("glMapBuffer");
|
||||
glUnmapBuffer = (PFNGLUNMAPBUFFERPROC) wglGetProcAddress("glUnmapBuffer");
|
||||
glBufferSubData = (PFNGLBUFFERSUBDATAPROC) wglGetProcAddress("glBufferSubData");
|
||||
glBindBufferBase = (PFNGLBINDBUFFERBASEPROC) wglGetProcAddress("glBindBufferBase");
|
||||
glActiveTexture = (PFNGLACTIVETEXTUREPROC) wglGetProcAddress("glActiveTexture");
|
||||
@@ -176,6 +180,8 @@ bool ResolveGLExtensions()
|
||||
&& glDeleteBuffers
|
||||
&& glBindBuffer
|
||||
&& glBufferData
|
||||
&& glMapBuffer
|
||||
&& glUnmapBuffer
|
||||
&& glBufferSubData
|
||||
&& glBindBufferBase
|
||||
&& glActiveTexture
|
||||
|
||||
@@ -89,6 +89,11 @@
|
||||
#define GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD 0x9160
|
||||
#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
|
||||
#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
|
||||
#define GL_ALREADY_SIGNALED 0x911A
|
||||
#define GL_TIMEOUT_EXPIRED 0x911B
|
||||
#define GL_CONDITION_SATISFIED 0x911C
|
||||
#define GL_WAIT_FAILED 0x911D
|
||||
#define GL_READ_ONLY 0x88B8
|
||||
|
||||
typedef struct __GLsync *GLsync;
|
||||
typedef unsigned __int64 GLuint64;
|
||||
@@ -100,6 +105,8 @@ typedef void (APIENTRYP PFNGLBINDBUFFERPROC) (GLenum target, GLuint buffer);
|
||||
typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC) (GLsizei n, const GLuint *buffers);
|
||||
typedef void (APIENTRYP PFNGLGENBUFFERSPROC) (GLsizei n, GLuint *buffers);
|
||||
typedef void (APIENTRYP PFNGLBUFFERDATAPROC) (GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage);
|
||||
typedef GLvoid* (APIENTRYP PFNGLMAPBUFFERPROC) (GLenum target, GLenum access);
|
||||
typedef GLboolean (APIENTRYP PFNGLUNMAPBUFFERPROC) (GLenum target);
|
||||
typedef void (APIENTRYP PFNGLATTACHSHADERPROC) (GLuint program, GLuint shader);
|
||||
typedef void (APIENTRYP PFNGLCOMPILESHADERPROC) (GLuint shader);
|
||||
typedef GLuint (APIENTRYP PFNGLCREATEPROGRAMPROC) (void);
|
||||
@@ -159,6 +166,8 @@ extern PFNGLGENBUFFERSPROC glGenBuffers;
|
||||
extern PFNGLDELETEBUFFERSPROC glDeleteBuffers;
|
||||
extern PFNGLBINDBUFFERPROC glBindBuffer;
|
||||
extern PFNGLBUFFERDATAPROC glBufferData;
|
||||
extern PFNGLMAPBUFFERPROC glMapBuffer;
|
||||
extern PFNGLUNMAPBUFFERPROC glUnmapBuffer;
|
||||
extern PFNGLBUFFERSUBDATAPROC glBufferSubData;
|
||||
extern PFNGLBINDBUFFERBASEPROC glBindBufferBase;
|
||||
extern PFNGLACTIVETEXTUREPROC glActiveTexture;
|
||||
|
||||
@@ -71,6 +71,9 @@ void OpenGLRenderer::SetDecodeShaderProgram(GLuint program, GLuint vertexShader,
|
||||
mDecodeProgram = program;
|
||||
mDecodeVertexShader = vertexShader;
|
||||
mDecodeFragmentShader = fragmentShader;
|
||||
mDecodePackedResolutionLocation = program != 0 ? glGetUniformLocation(program, "uPackedVideoResolution") : -1;
|
||||
mDecodeDecodedResolutionLocation = program != 0 ? glGetUniformLocation(program, "uDecodedVideoResolution") : -1;
|
||||
mDecodeInputPixelFormatLocation = program != 0 ? glGetUniformLocation(program, "uInputPixelFormat") : -1;
|
||||
}
|
||||
|
||||
void OpenGLRenderer::SetOutputPackShaderProgram(GLuint program, GLuint vertexShader, GLuint fragmentShader)
|
||||
@@ -78,6 +81,9 @@ void OpenGLRenderer::SetOutputPackShaderProgram(GLuint program, GLuint vertexSha
|
||||
mOutputPackProgram = program;
|
||||
mOutputPackVertexShader = vertexShader;
|
||||
mOutputPackFragmentShader = fragmentShader;
|
||||
mOutputPackResolutionLocation = program != 0 ? glGetUniformLocation(program, "uOutputVideoResolution") : -1;
|
||||
mOutputPackActiveWordsLocation = program != 0 ? glGetUniformLocation(program, "uActiveV210Words") : -1;
|
||||
mOutputPackFormatLocation = program != 0 ? glGetUniformLocation(program, "uOutputPackFormat") : -1;
|
||||
}
|
||||
|
||||
bool OpenGLRenderer::ReserveTemporaryRenderTargets(std::size_t count, unsigned width, unsigned height, std::string& error)
|
||||
@@ -217,6 +223,9 @@ void OpenGLRenderer::DestroyDecodeShaderProgram()
|
||||
glDeleteProgram(mDecodeProgram);
|
||||
mDecodeProgram = 0;
|
||||
}
|
||||
mDecodePackedResolutionLocation = -1;
|
||||
mDecodeDecodedResolutionLocation = -1;
|
||||
mDecodeInputPixelFormatLocation = -1;
|
||||
|
||||
if (mDecodeFragmentShader != 0)
|
||||
{
|
||||
@@ -238,6 +247,9 @@ void OpenGLRenderer::DestroyOutputPackShaderProgram()
|
||||
glDeleteProgram(mOutputPackProgram);
|
||||
mOutputPackProgram = 0;
|
||||
}
|
||||
mOutputPackResolutionLocation = -1;
|
||||
mOutputPackActiveWordsLocation = -1;
|
||||
mOutputPackFormatLocation = -1;
|
||||
|
||||
if (mOutputPackFragmentShader != 0)
|
||||
{
|
||||
|
||||
@@ -70,6 +70,12 @@ public:
|
||||
GLuint GlobalParamsUBO() const { return mGlobalParamsUBO; }
|
||||
GLuint DecodeProgram() const { return mDecodeProgram; }
|
||||
GLuint OutputPackProgram() const { return mOutputPackProgram; }
|
||||
GLint DecodePackedResolutionLocation() const { return mDecodePackedResolutionLocation; }
|
||||
GLint DecodeDecodedResolutionLocation() const { return mDecodeDecodedResolutionLocation; }
|
||||
GLint DecodeInputPixelFormatLocation() const { return mDecodeInputPixelFormatLocation; }
|
||||
GLint OutputPackResolutionLocation() const { return mOutputPackResolutionLocation; }
|
||||
GLint OutputPackActiveWordsLocation() const { return mOutputPackActiveWordsLocation; }
|
||||
GLint OutputPackFormatLocation() const { return mOutputPackFormatLocation; }
|
||||
GLsizeiptr GlobalParamsUBOSize() const { return mGlobalParamsUBOSize; }
|
||||
void SetGlobalParamsUBOSize(GLsizeiptr size) { mGlobalParamsUBOSize = size; }
|
||||
void ReplaceLayerPrograms(std::vector<LayerProgram>& newPrograms) { mLayerPrograms.swap(newPrograms); }
|
||||
@@ -101,9 +107,15 @@ private:
|
||||
GLuint mDecodeProgram = 0;
|
||||
GLuint mDecodeVertexShader = 0;
|
||||
GLuint mDecodeFragmentShader = 0;
|
||||
GLint mDecodePackedResolutionLocation = -1;
|
||||
GLint mDecodeDecodedResolutionLocation = -1;
|
||||
GLint mDecodeInputPixelFormatLocation = -1;
|
||||
GLuint mOutputPackProgram = 0;
|
||||
GLuint mOutputPackVertexShader = 0;
|
||||
GLuint mOutputPackFragmentShader = 0;
|
||||
GLint mOutputPackResolutionLocation = -1;
|
||||
GLint mOutputPackActiveWordsLocation = -1;
|
||||
GLint mOutputPackFormatLocation = -1;
|
||||
GLsizeiptr mGlobalParamsUBOSize = 0;
|
||||
int mViewWidth = 0;
|
||||
int mViewHeight = 0;
|
||||
|
||||
@@ -12,7 +12,8 @@ GlobalParamsBuffer::GlobalParamsBuffer(OpenGLRenderer& renderer) :
|
||||
|
||||
bool GlobalParamsBuffer::Update(const RuntimeRenderState& state, unsigned availableSourceHistoryLength, unsigned availableTemporalHistoryLength)
|
||||
{
|
||||
std::vector<unsigned char> buffer;
|
||||
std::vector<unsigned char>& buffer = mScratchBuffer;
|
||||
buffer.clear();
|
||||
buffer.reserve(512);
|
||||
|
||||
AppendStd140Float(buffer, static_cast<float>(state.timeSeconds));
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
#include "OpenGLRenderer.h"
|
||||
#include "ShaderTypes.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
class GlobalParamsBuffer
|
||||
{
|
||||
public:
|
||||
@@ -12,4 +14,5 @@ public:
|
||||
|
||||
private:
|
||||
OpenGLRenderer& mRenderer;
|
||||
std::vector<unsigned char> mScratchBuffer;
|
||||
};
|
||||
|
||||
@@ -841,6 +841,8 @@ bool RuntimeHost::PollFileChanges(bool& registryChanged, bool& reloadRequested,
|
||||
}
|
||||
|
||||
reloadRequested = mReloadRequested;
|
||||
if (registryChanged || reloadRequested)
|
||||
MarkRenderStateDirtyLocked();
|
||||
return true;
|
||||
}
|
||||
catch (const std::exception& exception)
|
||||
@@ -884,6 +886,7 @@ bool RuntimeHost::AddLayer(const std::string& shaderId, std::string& error)
|
||||
EnsureLayerDefaultsLocked(layer, shaderIt->second);
|
||||
mPersistentState.layers.push_back(layer);
|
||||
mReloadRequested = true;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -900,6 +903,7 @@ bool RuntimeHost::RemoveLayer(const std::string& layerId, std::string& error)
|
||||
|
||||
mPersistentState.layers.erase(it);
|
||||
mReloadRequested = true;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -921,6 +925,7 @@ bool RuntimeHost::MoveLayer(const std::string& layerId, int direction, std::stri
|
||||
|
||||
std::swap(mPersistentState.layers[index], mPersistentState.layers[newIndex]);
|
||||
mReloadRequested = true;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -949,6 +954,7 @@ bool RuntimeHost::MoveLayerToIndex(const std::string& layerId, std::size_t targe
|
||||
mPersistentState.layers.erase(mPersistentState.layers.begin() + static_cast<std::ptrdiff_t>(sourceIndex));
|
||||
mPersistentState.layers.insert(mPersistentState.layers.begin() + static_cast<std::ptrdiff_t>(targetIndex), movedLayer);
|
||||
mReloadRequested = true;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -964,6 +970,7 @@ bool RuntimeHost::SetLayerBypass(const std::string& layerId, bool bypassed, std:
|
||||
|
||||
layer->bypass = bypassed;
|
||||
mReloadRequested = true;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -988,6 +995,7 @@ bool RuntimeHost::SetLayerShader(const std::string& layerId, const std::string&
|
||||
layer->parameterValues.clear();
|
||||
EnsureLayerDefaultsLocked(*layer, shaderIt->second);
|
||||
mReloadRequested = true;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -1024,6 +1032,7 @@ bool RuntimeHost::UpdateLayerParameter(const std::string& layerId, const std::st
|
||||
const double previousCount = value.numberValues.empty() ? 0.0 : value.numberValues[0];
|
||||
const double triggerTime = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - mStartTime).count();
|
||||
value.numberValues = { previousCount + 1.0, triggerTime };
|
||||
MarkRenderStateDirtyLocked();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1032,6 +1041,7 @@ bool RuntimeHost::UpdateLayerParameter(const std::string& layerId, const std::st
|
||||
return false;
|
||||
|
||||
layer->parameterValues[parameterId] = normalized;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -1079,6 +1089,7 @@ bool RuntimeHost::UpdateLayerParameterByControlKey(const std::string& layerKey,
|
||||
const double previousCount = value.numberValues.empty() ? 0.0 : value.numberValues[0];
|
||||
const double triggerTime = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - mStartTime).count();
|
||||
value.numberValues = { previousCount + 1.0, triggerTime };
|
||||
MarkRenderStateDirtyLocked();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1087,6 +1098,7 @@ bool RuntimeHost::UpdateLayerParameterByControlKey(const std::string& layerKey,
|
||||
return false;
|
||||
|
||||
matchedLayer->parameterValues[parameterIt->id] = normalized;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -1110,6 +1122,7 @@ bool RuntimeHost::ResetLayerParameters(const std::string& layerId, std::string&
|
||||
|
||||
layer->parameterValues.clear();
|
||||
EnsureLayerDefaultsLocked(*layer, shaderIt->second);
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -1169,6 +1182,7 @@ bool RuntimeHost::LoadStackPreset(const std::string& presetName, std::string& er
|
||||
|
||||
mPersistentState.layers = nextLayers;
|
||||
mReloadRequested = true;
|
||||
MarkRenderStateDirtyLocked();
|
||||
return SavePersistentState(error);
|
||||
}
|
||||
|
||||
@@ -1197,10 +1211,21 @@ bool RuntimeHost::TrySetSignalStatus(bool hasSignal, unsigned width, unsigned he
|
||||
|
||||
void RuntimeHost::SetSignalStatusLocked(bool hasSignal, unsigned width, unsigned height, const std::string& modeName)
|
||||
{
|
||||
const bool changed = mHasSignal != hasSignal ||
|
||||
mSignalWidth != width ||
|
||||
mSignalHeight != height ||
|
||||
mSignalModeName != modeName;
|
||||
mHasSignal = hasSignal;
|
||||
mSignalWidth = width;
|
||||
mSignalHeight = height;
|
||||
mSignalModeName = modeName;
|
||||
if (changed)
|
||||
MarkRenderStateDirtyLocked();
|
||||
}
|
||||
|
||||
void RuntimeHost::MarkRenderStateDirtyLocked()
|
||||
{
|
||||
mRenderStateVersion.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void RuntimeHost::SetDeckLinkOutputStatus(const std::string& modelName, bool supportsInternalKeying, bool supportsExternalKeying,
|
||||
@@ -1456,6 +1481,11 @@ bool RuntimeHost::LoadConfig(std::string& error)
|
||||
const double configuredValue = maxTemporalHistoryFramesValue->asNumber(static_cast<double>(mConfig.maxTemporalHistoryFrames));
|
||||
mConfig.maxTemporalHistoryFrames = configuredValue <= 0.0 ? 0u : static_cast<unsigned>(configuredValue);
|
||||
}
|
||||
if (const JsonValue* previewFpsValue = configJson.find("previewFps"))
|
||||
{
|
||||
const double configuredValue = previewFpsValue->asNumber(static_cast<double>(mConfig.previewFps));
|
||||
mConfig.previewFps = configuredValue <= 0.0 ? 0u : static_cast<unsigned>(configuredValue);
|
||||
}
|
||||
if (const JsonValue* enableExternalKeyingValue = configJson.find("enableExternalKeying"))
|
||||
mConfig.enableExternalKeying = enableExternalKeyingValue->asBoolean(mConfig.enableExternalKeying);
|
||||
if (const JsonValue* videoFormatValue = configJson.find("videoFormat"))
|
||||
@@ -1674,6 +1704,8 @@ bool RuntimeHost::ScanShaderPackages(std::string& error)
|
||||
++it;
|
||||
}
|
||||
|
||||
MarkRenderStateDirtyLocked();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1840,6 +1872,7 @@ JsonValue RuntimeHost::BuildStateValue() const
|
||||
app.set("oscPort", JsonValue(static_cast<double>(mConfig.oscPort)));
|
||||
app.set("autoReload", JsonValue(mAutoReloadEnabled));
|
||||
app.set("maxTemporalHistoryFrames", JsonValue(static_cast<double>(mConfig.maxTemporalHistoryFrames)));
|
||||
app.set("previewFps", JsonValue(static_cast<double>(mConfig.previewFps)));
|
||||
app.set("enableExternalKeying", JsonValue(mConfig.enableExternalKeying));
|
||||
app.set("inputVideoFormat", JsonValue(mConfig.inputVideoFormat));
|
||||
app.set("inputFrameRate", JsonValue(mConfig.inputFrameRate));
|
||||
|
||||
@@ -56,6 +56,7 @@ public:
|
||||
bool TryGetLayerRenderStates(unsigned outputWidth, unsigned outputHeight, std::vector<RuntimeRenderState>& states) const;
|
||||
void RefreshDynamicRenderStateFields(std::vector<RuntimeRenderState>& states) const;
|
||||
std::string BuildStateJson() const;
|
||||
uint64_t GetRenderStateVersion() const { return mRenderStateVersion.load(std::memory_order_relaxed); }
|
||||
|
||||
const std::filesystem::path& GetRepoRoot() const { return mRepoRoot; }
|
||||
const std::filesystem::path& GetUiRoot() const { return mUiRoot; }
|
||||
@@ -64,6 +65,7 @@ public:
|
||||
unsigned short GetServerPort() const { return mServerPort; }
|
||||
unsigned short GetOscPort() const { return mConfig.oscPort; }
|
||||
unsigned GetMaxTemporalHistoryFrames() const { return mConfig.maxTemporalHistoryFrames; }
|
||||
unsigned GetPreviewFps() const { return mConfig.previewFps; }
|
||||
bool ExternalKeyingEnabled() const { return mConfig.enableExternalKeying; }
|
||||
const std::string& GetInputVideoFormat() const { return mConfig.inputVideoFormat; }
|
||||
const std::string& GetInputFrameRate() const { return mConfig.inputFrameRate; }
|
||||
@@ -80,6 +82,7 @@ private:
|
||||
unsigned short oscPort = 9000;
|
||||
bool autoReload = true;
|
||||
unsigned maxTemporalHistoryFrames = 4;
|
||||
unsigned previewFps = 30;
|
||||
bool enableExternalKeying = false;
|
||||
std::string inputVideoFormat = "1080p";
|
||||
std::string inputFrameRate = "59.94";
|
||||
@@ -135,6 +138,7 @@ private:
|
||||
const LayerPersistentState* FindLayerById(const std::string& layerId) const;
|
||||
std::string GenerateLayerId();
|
||||
void SetSignalStatusLocked(bool hasSignal, unsigned width, unsigned height, const std::string& modeName);
|
||||
void MarkRenderStateDirtyLocked();
|
||||
void SetPerformanceStatsLocked(double frameBudgetMilliseconds, double renderMilliseconds);
|
||||
void SetFramePacingStatsLocked(double completionIntervalMilliseconds, double smoothedCompletionIntervalMilliseconds,
|
||||
double maxCompletionIntervalMilliseconds, uint64_t lateFrameCount, uint64_t droppedFrameCount, uint64_t flushedFrameCount);
|
||||
@@ -179,6 +183,7 @@ private:
|
||||
bool mAutoReloadEnabled;
|
||||
std::chrono::steady_clock::time_point mStartTime;
|
||||
std::chrono::steady_clock::time_point mLastScanTime;
|
||||
std::atomic<uint64_t> mFrameCounter;
|
||||
std::atomic<uint64_t> mFrameCounter{ 0 };
|
||||
std::atomic<uint64_t> mRenderStateVersion{ 0 };
|
||||
uint64_t mNextLayerId;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user