16bit processing
All checks were successful
CI / React UI Build (push) Successful in 38s
CI / Native Windows Build And Tests (push) Successful in 1m48s
CI / Windows Release Package (push) Successful in 2m16s

This commit is contained in:
2026-05-08 13:27:41 +10:00
parent fb9122ecdc
commit c9fed70a60
30 changed files with 770 additions and 1129 deletions

View File

@@ -60,6 +60,9 @@
#define GL_DYNAMIC_DRAW 0x88E8
#define GL_UNIFORM_BUFFER 0x8A11
#define GL_RGBA8 0x8058
#define GL_RGBA16F 0x881A
#define GL_TEXTURE0 0x84C0
#define GL_ACTIVE_TEXTURE 0x84E0
#define GL_ARRAY_BUFFER 0x8892
#define GL_PIXEL_PACK_BUFFER 0x88EB
#define GL_PIXEL_UNPACK_BUFFER 0x88EC

View File

@@ -16,6 +16,7 @@ const char* kDecodeFragmentShaderSource =
"layout(binding = 2) uniform sampler2D uPackedVideoInput;\n"
"uniform vec2 uPackedVideoResolution;\n"
"uniform vec2 uDecodedVideoResolution;\n"
"uniform int uInputPixelFormat;\n"
"in vec2 vTexCoord;\n"
"layout(location = 0) out vec4 fragColor;\n"
"vec4 rec709YCbCr2rgba(float Y, float Cb, float Cr, float a)\n"
@@ -25,14 +26,116 @@ const char* kDecodeFragmentShaderSource =
" Cr = (Cr * 256.0 - 16.0) / 224.0 - 0.5;\n"
" return vec4(Y + 1.5748 * Cr, Y - 0.1873 * Cb - 0.4681 * Cr, Y + 1.8556 * Cb, a);\n"
"}\n"
"vec4 rec709YCbCr10_2rgba(float Y, float Cb, float Cr, float a)\n"
"{\n"
" Y = (Y - 64.0) / 876.0;\n"
" Cb = (Cb - 64.0) / 896.0 - 0.5;\n"
" Cr = (Cr - 64.0) / 896.0 - 0.5;\n"
" return vec4(Y + 1.5748 * Cr, Y - 0.1873 * Cb - 0.4681 * Cr, Y + 1.8556 * Cb, a);\n"
"}\n"
"uint loadV210Word(ivec2 coord)\n"
"{\n"
" vec4 b = round(texelFetch(uPackedVideoInput, coord, 0) * 255.0);\n"
" return uint(b.r) | (uint(b.g) << 8) | (uint(b.b) << 16) | (uint(b.a) << 24);\n"
"}\n"
"float v210Component(uint word, int index)\n"
"{\n"
" return float((word >> uint(index * 10)) & 1023u);\n"
"}\n"
"vec4 decodeUyvy8(ivec2 outputCoord, ivec2 packedSize)\n"
"{\n"
" ivec2 packedCoord = ivec2(clamp(outputCoord.x / 2, 0, packedSize.x - 1), clamp(outputCoord.y, 0, packedSize.y - 1));\n"
" vec4 macroPixel = texelFetch(uPackedVideoInput, packedCoord, 0);\n"
" float ySample = (outputCoord.x & 1) != 0 ? macroPixel.a : macroPixel.g;\n"
" return rec709YCbCr2rgba(ySample, macroPixel.b, macroPixel.r, 1.0);\n"
"}\n"
"vec4 decodeV210(ivec2 outputCoord, ivec2 packedSize)\n"
"{\n"
" int group = outputCoord.x / 6;\n"
" int pixel = outputCoord.x - group * 6;\n"
" int wordBase = group * 4;\n"
" ivec2 rowBase = ivec2(wordBase, clamp(outputCoord.y, 0, packedSize.y - 1));\n"
" uint w0 = loadV210Word(ivec2(min(rowBase.x + 0, packedSize.x - 1), rowBase.y));\n"
" uint w1 = loadV210Word(ivec2(min(rowBase.x + 1, packedSize.x - 1), rowBase.y));\n"
" uint w2 = loadV210Word(ivec2(min(rowBase.x + 2, packedSize.x - 1), rowBase.y));\n"
" uint w3 = loadV210Word(ivec2(min(rowBase.x + 3, packedSize.x - 1), rowBase.y));\n"
" float y0 = v210Component(w0, 1);\n"
" float y1 = v210Component(w1, 0);\n"
" float y2 = v210Component(w1, 2);\n"
" float y3 = v210Component(w2, 1);\n"
" float y4 = v210Component(w3, 0);\n"
" float y5 = v210Component(w3, 2);\n"
" float cb0 = v210Component(w0, 0);\n"
" float cr0 = v210Component(w0, 2);\n"
" float cb2 = v210Component(w1, 1);\n"
" float cr2 = v210Component(w2, 0);\n"
" float cb4 = v210Component(w2, 2);\n"
" float cr4 = v210Component(w3, 1);\n"
" float ySample = pixel == 0 ? y0 : pixel == 1 ? y1 : pixel == 2 ? y2 : pixel == 3 ? y3 : pixel == 4 ? y4 : y5;\n"
" float cbSample = pixel < 2 ? cb0 : pixel < 4 ? cb2 : cb4;\n"
" float crSample = pixel < 2 ? cr0 : pixel < 4 ? cr2 : cr4;\n"
" return rec709YCbCr10_2rgba(ySample, cbSample, crSample, 1.0);\n"
"}\n"
"void main()\n"
"{\n"
" vec2 correctedUv = vec2(vTexCoord.x, 1.0 - vTexCoord.y);\n"
" ivec2 decodedSize = ivec2(max(uDecodedVideoResolution, vec2(1.0, 1.0)));\n"
" ivec2 outputCoord = clamp(ivec2(correctedUv * vec2(decodedSize)), ivec2(0, 0), decodedSize - ivec2(1, 1));\n"
" ivec2 packedSize = ivec2(max(uPackedVideoResolution, vec2(1.0, 1.0)));\n"
" ivec2 packedCoord = ivec2(clamp(outputCoord.x / 2, 0, packedSize.x - 1), clamp(outputCoord.y, 0, packedSize.y - 1));\n"
" vec4 macroPixel = texelFetch(uPackedVideoInput, packedCoord, 0);\n"
" float ySample = (outputCoord.x & 1) != 0 ? macroPixel.a : macroPixel.g;\n"
" fragColor = rec709YCbCr2rgba(ySample, macroPixel.b, macroPixel.r, 1.0);\n"
" fragColor = uInputPixelFormat == 1 ? decodeV210(outputCoord, packedSize) : decodeUyvy8(outputCoord, packedSize);\n"
"}\n";
const char* kOutputPackFragmentShaderSource =
"#version 430 core\n"
"layout(binding = 0) uniform sampler2D uOutputRgb;\n"
"uniform vec2 uOutputVideoResolution;\n"
"uniform float uActiveV210Words;\n"
"in vec2 vTexCoord;\n"
"layout(location = 0) out vec4 fragColor;\n"
"vec3 rgbAt(int x, int y)\n"
"{\n"
" ivec2 size = ivec2(max(uOutputVideoResolution, vec2(1.0, 1.0)));\n"
" return clamp(texelFetch(uOutputRgb, ivec2(clamp(x, 0, size.x - 1), clamp(y, 0, size.y - 1)), 0).rgb, vec3(0.0), vec3(1.0));\n"
"}\n"
"vec3 rgbToLegalYcbcr10(vec3 rgb)\n"
"{\n"
" float y = dot(rgb, vec3(0.2126, 0.7152, 0.0722));\n"
" float cb = (rgb.b - y) / 1.8556 + 0.5;\n"
" float cr = (rgb.r - y) / 1.5748 + 0.5;\n"
" return vec3(clamp(round(64.0 + y * 876.0), 64.0, 940.0), clamp(round(64.0 + cb * 896.0), 64.0, 960.0), clamp(round(64.0 + cr * 896.0), 64.0, 960.0));\n"
"}\n"
"uint makeWord(float a, float b, float c)\n"
"{\n"
" return (uint(a) & 1023u) | ((uint(b) & 1023u) << 10) | ((uint(c) & 1023u) << 20);\n"
"}\n"
"vec4 wordToBytes(uint word)\n"
"{\n"
" return vec4(float(word & 255u), float((word >> 8) & 255u), float((word >> 16) & 255u), float((word >> 24) & 255u)) / 255.0;\n"
"}\n"
"void main()\n"
"{\n"
" ivec2 outCoord = ivec2(gl_FragCoord.xy);\n"
" if (float(outCoord.x) >= uActiveV210Words)\n"
" {\n"
" fragColor = vec4(0.0);\n"
" return;\n"
" }\n"
" int group = outCoord.x / 4;\n"
" int wordIndex = outCoord.x - group * 4;\n"
" int pixelBase = group * 6;\n"
" int y = outCoord.y;\n"
" vec3 c0 = rgbToLegalYcbcr10(rgbAt(pixelBase + 0, y));\n"
" vec3 c1 = rgbToLegalYcbcr10(rgbAt(pixelBase + 1, y));\n"
" vec3 c2 = rgbToLegalYcbcr10(rgbAt(pixelBase + 2, y));\n"
" vec3 c3 = rgbToLegalYcbcr10(rgbAt(pixelBase + 3, y));\n"
" vec3 c4 = rgbToLegalYcbcr10(rgbAt(pixelBase + 4, y));\n"
" vec3 c5 = rgbToLegalYcbcr10(rgbAt(pixelBase + 5, y));\n"
" float cb0 = round((c0.y + c1.y) * 0.5);\n"
" float cr0 = round((c0.z + c1.z) * 0.5);\n"
" float cb2 = round((c2.y + c3.y) * 0.5);\n"
" float cr2 = round((c2.z + c3.z) * 0.5);\n"
" float cb4 = round((c4.y + c5.y) * 0.5);\n"
" float cr4 = round((c4.z + c5.z) * 0.5);\n"
" uint word = wordIndex == 0 ? makeWord(cb0, c0.x, cr0) : wordIndex == 1 ? makeWord(c1.x, cb2, c2.x) : wordIndex == 2 ? makeWord(cr2, c3.x, cb4) : makeWord(c4.x, cr4, c5.x);\n"
" fragColor = wordToBytes(word);\n"
"}\n";

View File

@@ -2,3 +2,4 @@
extern const char* kFullscreenTriangleVertexShaderSource;
extern const char* kDecodeFragmentShaderSource;
extern const char* kOutputPackFragmentShaderSource;

View File

@@ -1,5 +1,4 @@
#include "DeckLinkDisplayMode.h"
#include "DeckLinkFrameTransfer.h"
#include "DeckLinkSession.h"
#include "OpenGLComposite.h"
#include "GLExtensions.h"
@@ -87,17 +86,14 @@ bool OpenGLComposite::InitDeckLink()
MessageBoxA(NULL, initFailureReason.c_str(), title, MB_OK | MB_ICONERROR);
return false;
}
if (!mDeckLink->SelectPreferredFormats(videoModes, initFailureReason))
goto error;
if (! CheckOpenGLExtensions())
{
initFailureReason = "OpenGL extension checks failed.";
goto error;
}
if (mDeckLink->InputOutputDimensionsDiffer())
{
mRenderer->SetFastTransferAvailable(false);
OutputDebugStringA("Input/output dimensions differ; using regular OpenGL transfer fallback instead of fast transfer.\n");
}
if (! InitOpenGLState())
{
@@ -115,16 +111,6 @@ bool OpenGLComposite::InitDeckLink()
else
resizeWindow(mDeckLink->OutputFrameWidth() / 2, mDeckLink->OutputFrameHeight() / 2);
if (mRenderer->FastTransferAvailable())
{
// Initialize fast video frame transfers
if (! VideoFrameTransfer::initialize(mDeckLink->InputFrameWidth(), mDeckLink->InputFrameHeight(), mRenderer->CaptureTexture(), mRenderer->OutputTexture()))
{
MessageBox(NULL, _T("Cannot initialize video transfers."), _T("VideoFrameTransfer error."), MB_OK);
goto error;
}
}
if (!mDeckLink->ConfigureInput(this, hGLDC, hGLRC, videoModes.input, initFailureReason))
{
goto error;
@@ -222,6 +208,11 @@ bool OpenGLComposite::InitOpenGLState()
MessageBoxA(NULL, compilerErrorMessage, "OpenGL decode shader failed to load or compile", MB_OK);
return false;
}
if (!mShaderPrograms->CompileOutputPackShader(sizeof(compilerErrorMessage), compilerErrorMessage))
{
MessageBoxA(NULL, compilerErrorMessage, "OpenGL output pack shader failed to load or compile", MB_OK);
return false;
}
if (!mShaderPrograms->CompileLayerPrograms(mDeckLink->InputFrameWidth(), mDeckLink->InputFrameHeight(), sizeof(compilerErrorMessage), compilerErrorMessage))
{
@@ -233,7 +224,14 @@ bool OpenGLComposite::InitOpenGLState()
mShaderPrograms->ResetTemporalHistoryState();
std::string rendererError;
if (!mRenderer->InitializeResources(mDeckLink->InputFrameWidth(), mDeckLink->InputFrameHeight(), mDeckLink->OutputFrameWidth(), mDeckLink->OutputFrameHeight(), rendererError))
if (!mRenderer->InitializeResources(
mDeckLink->InputFrameWidth(),
mDeckLink->InputFrameHeight(),
mDeckLink->CaptureTextureWidth(),
mDeckLink->OutputFrameWidth(),
mDeckLink->OutputFrameHeight(),
mDeckLink->OutputPackTextureWidth(),
rendererError))
{
MessageBoxA(NULL, rendererError.c_str(), "OpenGL initialization error.", MB_OK);
return false;
@@ -315,6 +313,8 @@ void OpenGLComposite::renderEffect()
layerStates,
mDeckLink->InputFrameWidth(),
mDeckLink->InputFrameHeight(),
mDeckLink->CaptureTextureWidth(),
mDeckLink->InputPixelFormat(),
historyCap,
[this](const RuntimeRenderState& state, LayerProgram::TextBinding& textBinding, std::string& error) {
return mShaderPrograms->UpdateTextBindingTexture(state, textBinding, error);
@@ -392,11 +392,6 @@ void OpenGLComposite::resetTemporalHistoryState()
bool OpenGLComposite::CheckOpenGLExtensions()
{
mRenderer->SetFastTransferAvailable(VideoFrameTransfer::checkFastMemoryTransferAvailable());
if (!mRenderer->FastTransferAvailable())
OutputDebugStringA("Fast memory transfer extension not available, using regular OpenGL transfer fallback instead\n");
return true;
}

View File

@@ -1,12 +1,9 @@
#include "OpenGLDeckLinkBridge.h"
#include "DeckLinkFrameTransfer.h"
#include "DeckLinkSession.h"
#include "OpenGLRenderer.h"
#include "RuntimeHost.h"
#include "VideoFrameTransfer.h"
#include <atlbase.h>
#include <chrono>
#include <gl/gl.h>
@@ -87,29 +84,20 @@ void OpenGLDeckLinkBridge::VideoFrameArrived(IDeckLinkVideoInputFrame* inputFram
wglMakeCurrent(mHdc, mHglrc); // make OpenGL context current in this thread
if (mRenderer.FastTransferAvailable())
{
CComQIPtr<PinnedMemoryAllocator, &IID_PinnedMemoryAllocator> allocator(inputFrameBuffer);
if (!allocator || !allocator->transferFrame(videoPixels, mRenderer.CaptureTexture()))
OutputDebugStringA("Capture: transferFrame() failed\n");
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mRenderer.TextureUploadBuffer());
glBufferData(GL_PIXEL_UNPACK_BUFFER, textureSize, videoPixels, GL_DYNAMIC_DRAW);
glBindTexture(GL_TEXTURE_2D, mRenderer.CaptureTexture());
allocator->waitForTransferComplete(videoPixels);
}
// NULL for last arg indicates use current GL_PIXEL_UNPACK_BUFFER target as texture data.
if (mDeckLink.InputPixelFormat() == VideoIOPixelFormat::V210)
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mDeckLink.CaptureTextureWidth(), mDeckLink.InputFrameHeight(), GL_RGBA, GL_UNSIGNED_BYTE, NULL);
else
{
// Use a straightforward texture buffer
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mRenderer.UnpinnedTextureBuffer());
glBufferData(GL_PIXEL_UNPACK_BUFFER, textureSize, videoPixels, GL_DYNAMIC_DRAW);
glBindTexture(GL_TEXTURE_2D, mRenderer.CaptureTexture());
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mDeckLink.CaptureTextureWidth(), mDeckLink.InputFrameHeight(), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
// NULL for last arg indicates use current GL_PIXEL_UNPACK_BUFFER target as texture data
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mDeckLink.InputFrameWidth() / 2, mDeckLink.InputFrameHeight(), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
wglMakeCurrent(NULL, NULL);
@@ -135,17 +123,35 @@ void OpenGLDeckLinkBridge::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedF
// Draw the effect output to the off-screen framebuffer.
const auto renderStartTime = std::chrono::steady_clock::now();
if (mRenderer.FastTransferAvailable())
VideoFrameTransfer::beginTextureInUse(VideoFrameTransfer::GPUtoCPU);
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.CompositeFramebuffer());
mRenderEffect();
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.CompositeFramebuffer());
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mRenderer.OutputFramebuffer());
glBlitFramebuffer(0, 0, mDeckLink.InputFrameWidth(), mDeckLink.InputFrameHeight(), 0, 0, mDeckLink.OutputFrameWidth(), mDeckLink.OutputFrameHeight(), GL_COLOR_BUFFER_BIT, GL_LINEAR);
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputFramebuffer());
if (mDeckLink.OutputIsTenBit())
{
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
glViewport(0, 0, mDeckLink.OutputPackTextureWidth(), mDeckLink.OutputFrameHeight());
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDisable(GL_DEPTH_TEST);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, mRenderer.OutputTexture());
glBindVertexArray(mRenderer.FullscreenVertexArray());
glUseProgram(mRenderer.OutputPackProgram());
const GLint outputResolutionLocation = glGetUniformLocation(mRenderer.OutputPackProgram(), "uOutputVideoResolution");
const GLint activeWordsLocation = glGetUniformLocation(mRenderer.OutputPackProgram(), "uActiveV210Words");
if (outputResolutionLocation >= 0)
glUniform2f(outputResolutionLocation, static_cast<float>(mDeckLink.OutputFrameWidth()), static_cast<float>(mDeckLink.OutputFrameHeight()));
if (activeWordsLocation >= 0)
glUniform1f(activeWordsLocation, static_cast<float>(ActiveV210WordsForWidth(mDeckLink.OutputFrameWidth())));
glDrawArrays(GL_TRIANGLES, 0, 3);
glUseProgram(0);
glBindVertexArray(0);
glBindTexture(GL_TEXTURE_2D, 0);
}
glFlush();
if (mRenderer.FastTransferAvailable())
VideoFrameTransfer::endTextureInUse(VideoFrameTransfer::GPUtoCPU);
const auto renderEndTime = std::chrono::steady_clock::now();
const double frameBudgetMilliseconds = mDeckLink.FrameBudgetMilliseconds();
const double renderMilliseconds = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(renderEndTime - renderStartTime).count();
@@ -171,23 +177,19 @@ void OpenGLDeckLinkBridge::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedF
void* pFrame;
outputVideoFrameBuffer->GetBytes(&pFrame);
if (mRenderer.FastTransferAvailable())
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
if (mDeckLink.OutputIsTenBit())
{
if (!mDeckLink.TransferPlayoutFrame(pFrame, mRenderer.OutputTexture()))
OutputDebugStringA("Playback: transferFrame() failed\n");
// Wait for transfer to system memory to complete
mDeckLink.WaitForPlayoutTransferComplete(pFrame);
mPaint();
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputPackFramebuffer());
glReadPixels(0, 0, mDeckLink.OutputPackTextureWidth(), mDeckLink.OutputFrameHeight(), GL_RGBA, GL_UNSIGNED_BYTE, pFrame);
}
else
{
glPixelStorei(GL_PACK_ALIGNMENT, 4);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer());
glReadPixels(0, 0, mDeckLink.OutputFrameWidth(), mDeckLink.OutputFrameHeight(), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, pFrame);
mPaint();
}
mPaint();
outputVideoFrameBuffer->EndAccess(bmdBufferAccessWrite);
outputVideoFrameBuffer->Release();

View File

@@ -1,7 +1,6 @@
#include "OpenGLRenderPass.h"
#include "GlRenderConstants.h"
#include "VideoFrameTransfer.h"
OpenGLRenderPass::OpenGLRenderPass(OpenGLRenderer& renderer) :
mRenderer(renderer)
@@ -13,22 +12,18 @@ void OpenGLRenderPass::Render(
const std::vector<RuntimeRenderState>& layerStates,
unsigned inputFrameWidth,
unsigned inputFrameHeight,
unsigned captureTextureWidth,
VideoIOPixelFormat inputPixelFormat,
unsigned historyCap,
const TextBindingUpdater& updateTextBinding,
const GlobalParamsUpdater& updateGlobalParams)
{
if (hasInputSource && mRenderer.FastTransferAvailable())
{
// Signal that the capture texture is about to be sampled into the composite framebuffer.
VideoFrameTransfer::beginTextureInUse(VideoFrameTransfer::CPUtoGPU);
}
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDisable(GL_DEPTH_TEST);
if (hasInputSource)
{
RenderDecodePass(inputFrameWidth, inputFrameHeight);
RenderDecodePass(inputFrameWidth, inputFrameHeight, captureTextureWidth, inputPixelFormat);
}
else
{
@@ -72,12 +67,9 @@ void OpenGLRenderPass::Render(
}
mRenderer.TemporalHistory().PushSourceFramebuffer(mRenderer.DecodeFramebuffer(), inputFrameWidth, inputFrameHeight);
if (hasInputSource && mRenderer.FastTransferAvailable())
VideoFrameTransfer::endTextureInUse(VideoFrameTransfer::CPUtoGPU);
}
void OpenGLRenderPass::RenderDecodePass(unsigned inputFrameWidth, unsigned inputFrameHeight)
void OpenGLRenderPass::RenderDecodePass(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned captureTextureWidth, VideoIOPixelFormat inputPixelFormat)
{
glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.DecodeFramebuffer());
glViewport(0, 0, inputFrameWidth, inputFrameHeight);
@@ -89,10 +81,13 @@ void OpenGLRenderPass::RenderDecodePass(unsigned inputFrameWidth, unsigned input
const GLint packedResolutionLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uPackedVideoResolution");
const GLint decodedResolutionLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uDecodedVideoResolution");
const GLint inputPixelFormatLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uInputPixelFormat");
if (packedResolutionLocation >= 0)
glUniform2f(packedResolutionLocation, static_cast<float>(inputFrameWidth / 2), static_cast<float>(inputFrameHeight));
glUniform2f(packedResolutionLocation, static_cast<float>(captureTextureWidth), static_cast<float>(inputFrameHeight));
if (decodedResolutionLocation >= 0)
glUniform2f(decodedResolutionLocation, static_cast<float>(inputFrameWidth), static_cast<float>(inputFrameHeight));
if (inputPixelFormatLocation >= 0)
glUniform1i(inputPixelFormatLocation, inputPixelFormat == VideoIOPixelFormat::V210 ? 1 : 0);
glDrawArrays(GL_TRIANGLES, 0, 3);

View File

@@ -2,6 +2,7 @@
#include "OpenGLRenderer.h"
#include "ShaderTypes.h"
#include "VideoIOFormat.h"
#include <functional>
#include <string>
@@ -21,12 +22,14 @@ public:
const std::vector<RuntimeRenderState>& layerStates,
unsigned inputFrameWidth,
unsigned inputFrameHeight,
unsigned captureTextureWidth,
VideoIOPixelFormat inputPixelFormat,
unsigned historyCap,
const TextBindingUpdater& updateTextBinding,
const GlobalParamsUpdater& updateGlobalParams);
private:
void RenderDecodePass(unsigned inputFrameWidth, unsigned inputFrameHeight);
void RenderDecodePass(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned captureTextureWidth, VideoIOPixelFormat inputPixelFormat);
void RenderShaderProgram(
GLuint sourceTexture,
GLuint destinationFrameBuffer,

View File

@@ -4,43 +4,52 @@
namespace
{
void ConfigureFrameTexture(unsigned width, unsigned height)
void ConfigureByteFrameTexture(unsigned width, unsigned height)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
}
void ConfigureDisplayFrameTexture(unsigned width, unsigned height)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, width, height, 0, GL_RGBA, GL_FLOAT, NULL);
}
}
bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned outputFrameWidth, unsigned outputFrameHeight, std::string& error)
bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned captureTextureWidth, unsigned outputFrameWidth, unsigned outputFrameHeight, unsigned outputPackTextureWidth, std::string& error)
{
glClearColor(0.0f, 0.0f, 0.0f, 0.5f);
glDisable(GL_DEPTH_TEST);
if (!mFastTransferExtensionAvailable)
glGenBuffers(1, &mUnpinnedTextureBuffer);
glGenBuffers(1, &mTextureUploadBuffer);
glGenTextures(1, &mCaptureTexture);
glBindTexture(GL_TEXTURE_2D, mCaptureTexture);
ConfigureFrameTexture(inputFrameWidth / 2, inputFrameHeight);
ConfigureByteFrameTexture(captureTextureWidth, inputFrameHeight);
glBindTexture(GL_TEXTURE_2D, 0);
glGenTextures(1, &mDecodedTexture);
glBindTexture(GL_TEXTURE_2D, mDecodedTexture);
ConfigureFrameTexture(inputFrameWidth, inputFrameHeight);
ConfigureDisplayFrameTexture(inputFrameWidth, inputFrameHeight);
glBindTexture(GL_TEXTURE_2D, 0);
glGenTextures(1, &mLayerTempTexture);
glBindTexture(GL_TEXTURE_2D, mLayerTempTexture);
ConfigureFrameTexture(inputFrameWidth, inputFrameHeight);
ConfigureDisplayFrameTexture(inputFrameWidth, inputFrameHeight);
glBindTexture(GL_TEXTURE_2D, 0);
glGenFramebuffers(1, &mDecodeFrameBuf);
glGenFramebuffers(1, &mLayerTempFrameBuf);
glGenFramebuffers(1, &mIdFrameBuf);
glGenFramebuffers(1, &mOutputFrameBuf);
glGenFramebuffers(1, &mOutputPackFrameBuf);
glGenRenderbuffers(1, &mIdColorBuf);
glGenRenderbuffers(1, &mIdDepthBuf);
glGenVertexArrays(1, &mFullscreenVAO);
@@ -65,7 +74,7 @@ bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inpu
glBindFramebuffer(GL_FRAMEBUFFER, mIdFrameBuf);
glGenTextures(1, &mFBOTexture);
glBindTexture(GL_TEXTURE_2D, mFBOTexture);
ConfigureFrameTexture(inputFrameWidth, inputFrameHeight);
ConfigureDisplayFrameTexture(inputFrameWidth, inputFrameHeight);
glBindRenderbuffer(GL_RENDERBUFFER, mIdDepthBuf);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, inputFrameWidth, inputFrameHeight);
@@ -79,7 +88,7 @@ bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inpu
glGenTextures(1, &mOutputTexture);
glBindTexture(GL_TEXTURE_2D, mOutputTexture);
ConfigureFrameTexture(outputFrameWidth, outputFrameHeight);
ConfigureDisplayFrameTexture(outputFrameWidth, outputFrameHeight);
glBindFramebuffer(GL_FRAMEBUFFER, mOutputFrameBuf);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mOutputTexture, 0);
@@ -89,6 +98,18 @@ bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inpu
return false;
}
glGenTextures(1, &mOutputPackTexture);
glBindTexture(GL_TEXTURE_2D, mOutputPackTexture);
ConfigureByteFrameTexture(outputPackTextureWidth, outputFrameHeight);
glBindFramebuffer(GL_FRAMEBUFFER, mOutputPackFrameBuf);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mOutputPackTexture, 0);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
{
error = "Cannot initialize output pack framebuffer.";
return false;
}
glBindTexture(GL_TEXTURE_2D, 0);
glBindRenderbuffer(GL_RENDERBUFFER, 0);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
@@ -109,6 +130,13 @@ void OpenGLRenderer::SetDecodeShaderProgram(GLuint program, GLuint vertexShader,
mDecodeFragmentShader = fragmentShader;
}
void OpenGLRenderer::SetOutputPackShaderProgram(GLuint program, GLuint vertexShader, GLuint fragmentShader)
{
mOutputPackProgram = program;
mOutputPackVertexShader = vertexShader;
mOutputPackFragmentShader = fragmentShader;
}
void OpenGLRenderer::ResizeView(int width, int height)
{
mViewWidth = width;
@@ -166,6 +194,8 @@ void OpenGLRenderer::DestroyResources()
glDeleteFramebuffers(1, &mIdFrameBuf);
if (mOutputFrameBuf != 0)
glDeleteFramebuffers(1, &mOutputFrameBuf);
if (mOutputPackFrameBuf != 0)
glDeleteFramebuffers(1, &mOutputPackFrameBuf);
if (mIdColorBuf != 0)
glDeleteRenderbuffers(1, &mIdColorBuf);
if (mIdDepthBuf != 0)
@@ -180,8 +210,10 @@ void OpenGLRenderer::DestroyResources()
glDeleteTextures(1, &mFBOTexture);
if (mOutputTexture != 0)
glDeleteTextures(1, &mOutputTexture);
if (mUnpinnedTextureBuffer != 0)
glDeleteBuffers(1, &mUnpinnedTextureBuffer);
if (mOutputPackTexture != 0)
glDeleteTextures(1, &mOutputPackTexture);
if (mTextureUploadBuffer != 0)
glDeleteBuffers(1, &mTextureUploadBuffer);
mFullscreenVAO = 0;
mGlobalParamsUBO = 0;
@@ -189,6 +221,7 @@ void OpenGLRenderer::DestroyResources()
mLayerTempFrameBuf = 0;
mIdFrameBuf = 0;
mOutputFrameBuf = 0;
mOutputPackFrameBuf = 0;
mIdColorBuf = 0;
mIdDepthBuf = 0;
mCaptureTexture = 0;
@@ -196,12 +229,14 @@ void OpenGLRenderer::DestroyResources()
mLayerTempTexture = 0;
mFBOTexture = 0;
mOutputTexture = 0;
mUnpinnedTextureBuffer = 0;
mOutputPackTexture = 0;
mTextureUploadBuffer = 0;
mGlobalParamsUBOSize = 0;
mTemporalHistory.DestroyResources();
DestroyLayerPrograms();
DestroyDecodeShaderProgram();
DestroyOutputPackShaderProgram();
}
void OpenGLRenderer::DestroySingleLayerProgram(LayerProgram& layerProgram)
@@ -272,3 +307,24 @@ void OpenGLRenderer::DestroyDecodeShaderProgram()
mDecodeVertexShader = 0;
}
}
void OpenGLRenderer::DestroyOutputPackShaderProgram()
{
if (mOutputPackProgram != 0)
{
glDeleteProgram(mOutputPackProgram);
mOutputPackProgram = 0;
}
if (mOutputPackFragmentShader != 0)
{
glDeleteShader(mOutputPackFragmentShader);
mOutputPackFragmentShader = 0;
}
if (mOutputPackVertexShader != 0)
{
glDeleteShader(mOutputPackVertexShader);
mOutputPackVertexShader = 0;
}
}

View File

@@ -44,21 +44,22 @@ public:
std::vector<TextBinding> textBindings;
};
bool FastTransferAvailable() const { return mFastTransferExtensionAvailable; }
void SetFastTransferAvailable(bool available) { mFastTransferExtensionAvailable = available; }
GLuint CaptureTexture() const { return mCaptureTexture; }
GLuint DecodedTexture() const { return mDecodedTexture; }
GLuint LayerTempTexture() const { return mLayerTempTexture; }
GLuint CompositeTexture() const { return mFBOTexture; }
GLuint OutputTexture() const { return mOutputTexture; }
GLuint UnpinnedTextureBuffer() const { return mUnpinnedTextureBuffer; }
GLuint OutputPackTexture() const { return mOutputPackTexture; }
GLuint TextureUploadBuffer() const { return mTextureUploadBuffer; }
GLuint DecodeFramebuffer() const { return mDecodeFrameBuf; }
GLuint LayerTempFramebuffer() const { return mLayerTempFrameBuf; }
GLuint CompositeFramebuffer() const { return mIdFrameBuf; }
GLuint OutputFramebuffer() const { return mOutputFrameBuf; }
GLuint OutputPackFramebuffer() const { return mOutputPackFrameBuf; }
GLuint FullscreenVertexArray() const { return mFullscreenVAO; }
GLuint GlobalParamsUBO() const { return mGlobalParamsUBO; }
GLuint DecodeProgram() const { return mDecodeProgram; }
GLuint OutputPackProgram() const { return mOutputPackProgram; }
GLsizeiptr GlobalParamsUBOSize() const { return mGlobalParamsUBOSize; }
void SetGlobalParamsUBOSize(GLsizeiptr size) { mGlobalParamsUBOSize = size; }
void ReplaceLayerPrograms(std::vector<LayerProgram>& newPrograms) { mLayerPrograms.swap(newPrograms); }
@@ -67,26 +68,29 @@ public:
TemporalHistoryBuffers& TemporalHistory() { return mTemporalHistory; }
const TemporalHistoryBuffers& TemporalHistory() const { return mTemporalHistory; }
void SetDecodeShaderProgram(GLuint program, GLuint vertexShader, GLuint fragmentShader);
bool InitializeResources(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned outputFrameWidth, unsigned outputFrameHeight, std::string& error);
void SetOutputPackShaderProgram(GLuint program, GLuint vertexShader, GLuint fragmentShader);
bool InitializeResources(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned captureTextureWidth, unsigned outputFrameWidth, unsigned outputFrameHeight, unsigned outputPackTextureWidth, std::string& error);
void ResizeView(int width, int height);
void PresentToWindow(HDC hdc, unsigned outputFrameWidth, unsigned outputFrameHeight);
void DestroyResources();
void DestroySingleLayerProgram(LayerProgram& layerProgram);
void DestroyLayerPrograms();
void DestroyDecodeShaderProgram();
void DestroyOutputPackShaderProgram();
private:
bool mFastTransferExtensionAvailable = false;
GLuint mCaptureTexture = 0;
GLuint mDecodedTexture = 0;
GLuint mLayerTempTexture = 0;
GLuint mFBOTexture = 0;
GLuint mOutputTexture = 0;
GLuint mUnpinnedTextureBuffer = 0;
GLuint mOutputPackTexture = 0;
GLuint mTextureUploadBuffer = 0;
GLuint mDecodeFrameBuf = 0;
GLuint mLayerTempFrameBuf = 0;
GLuint mIdFrameBuf = 0;
GLuint mOutputFrameBuf = 0;
GLuint mOutputPackFrameBuf = 0;
GLuint mIdColorBuf = 0;
GLuint mIdDepthBuf = 0;
GLuint mFullscreenVAO = 0;
@@ -94,6 +98,9 @@ private:
GLuint mDecodeProgram = 0;
GLuint mDecodeVertexShader = 0;
GLuint mDecodeFragmentShader = 0;
GLuint mOutputPackProgram = 0;
GLuint mOutputPackVertexShader = 0;
GLuint mOutputPackFragmentShader = 0;
GLsizeiptr mGlobalParamsUBOSize = 0;
int mViewWidth = 0;
int mViewHeight = 0;

View File

@@ -115,6 +115,11 @@ bool OpenGLShaderPrograms::CompileDecodeShader(int errorMessageSize, char* error
return mCompiler.CompileDecodeShader(errorMessageSize, errorMessage);
}
bool OpenGLShaderPrograms::CompileOutputPackShader(int errorMessageSize, char* errorMessage)
{
return mCompiler.CompileOutputPackShader(errorMessageSize, errorMessage);
}
void OpenGLShaderPrograms::DestroySingleLayerProgram(LayerProgram& layerProgram)
{
mRenderer.DestroySingleLayerProgram(layerProgram);

View File

@@ -20,6 +20,7 @@ public:
bool CompileLayerPrograms(unsigned inputFrameWidth, unsigned inputFrameHeight, int errorMessageSize, char* errorMessage);
bool CommitPreparedLayerPrograms(const PreparedShaderBuild& preparedBuild, unsigned inputFrameWidth, unsigned inputFrameHeight, int errorMessageSize, char* errorMessage);
bool CompileDecodeShader(int errorMessageSize, char* errorMessage);
bool CompileOutputPackShader(int errorMessageSize, char* errorMessage);
void DestroyLayerPrograms();
void DestroySingleLayerProgram(LayerProgram& layerProgram);
void DestroyDecodeShaderProgram();

View File

@@ -192,3 +192,53 @@ bool ShaderProgramCompiler::CompileDecodeShader(int errorMessageSize, char* erro
mRenderer.SetDecodeShaderProgram(newProgram.release(), newVertexShader.release(), newFragmentShader.release());
return true;
}
bool ShaderProgramCompiler::CompileOutputPackShader(int errorMessageSize, char* errorMessage)
{
GLsizei errorBufferSize = 0;
GLint compileResult = GL_FALSE;
GLint linkResult = GL_FALSE;
const char* vertexSource = kFullscreenTriangleVertexShaderSource;
const char* fragmentSource = kOutputPackFragmentShaderSource;
ScopedGlShader newVertexShader(glCreateShader(GL_VERTEX_SHADER));
glShaderSource(newVertexShader.get(), 1, (const GLchar**)&vertexSource, NULL);
glCompileShader(newVertexShader.get());
glGetShaderiv(newVertexShader.get(), GL_COMPILE_STATUS, &compileResult);
if (compileResult == GL_FALSE)
{
glGetShaderInfoLog(newVertexShader.get(), errorMessageSize, &errorBufferSize, errorMessage);
return false;
}
ScopedGlShader newFragmentShader(glCreateShader(GL_FRAGMENT_SHADER));
glShaderSource(newFragmentShader.get(), 1, (const GLchar**)&fragmentSource, NULL);
glCompileShader(newFragmentShader.get());
glGetShaderiv(newFragmentShader.get(), GL_COMPILE_STATUS, &compileResult);
if (compileResult == GL_FALSE)
{
glGetShaderInfoLog(newFragmentShader.get(), errorMessageSize, &errorBufferSize, errorMessage);
return false;
}
ScopedGlProgram newProgram(glCreateProgram());
glAttachShader(newProgram.get(), newVertexShader.get());
glAttachShader(newProgram.get(), newFragmentShader.get());
glLinkProgram(newProgram.get());
glGetProgramiv(newProgram.get(), GL_LINK_STATUS, &linkResult);
if (linkResult == GL_FALSE)
{
glGetProgramInfoLog(newProgram.get(), errorMessageSize, &errorBufferSize, errorMessage);
return false;
}
glUseProgram(newProgram.get());
const GLint outputSamplerLocation = glGetUniformLocation(newProgram.get(), "uOutputRgb");
if (outputSamplerLocation >= 0)
glUniform1i(outputSamplerLocation, 0);
glUseProgram(0);
mRenderer.DestroyOutputPackShaderProgram();
mRenderer.SetOutputPackShaderProgram(newProgram.release(), newVertexShader.release(), newFragmentShader.release());
return true;
}

View File

@@ -16,6 +16,7 @@ public:
bool CompileLayerProgram(const RuntimeRenderState& state, LayerProgram& layerProgram, int errorMessageSize, char* errorMessage);
bool CompilePreparedLayerProgram(const RuntimeRenderState& state, const std::string& fragmentShaderSource, LayerProgram& layerProgram, int errorMessageSize, char* errorMessage);
bool CompileDecodeShader(int errorMessageSize, char* errorMessage);
bool CompileOutputPackShader(int errorMessageSize, char* errorMessage);
private:
OpenGLRenderer& mRenderer;

View File

@@ -105,7 +105,7 @@ bool TemporalHistoryBuffers::CreateRing(Ring& ring, unsigned effectiveLength, Te
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, frameWidth, frameHeight, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, frameWidth, frameHeight, 0, GL_RGBA, GL_FLOAT, NULL);
glGenFramebuffers(1, &slot.framebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, slot.framebuffer);

View File

@@ -1,5 +1,6 @@
#pragma once
#include "GLExtensions.h"
#include "ShaderTypes.h"
#include <windows.h>

View File

@@ -1,377 +0,0 @@
/* -LICENSE-START-
** Copyright (c) 2012 Blackmagic Design
**
** Permission is hereby granted, free of charge, to any person or organization
** obtaining a copy of the software and accompanying documentation (the
** "Software") to use, reproduce, display, distribute, sub-license, execute,
** and transmit the Software, and to prepare derivative works of the Software,
** and to permit third-parties to whom the Software is furnished to do so, in
** accordance with:
**
** (1) if the Software is obtained from Blackmagic Design, the End User License
** Agreement for the Software Development Kit ("EULA") available at
** https://www.blackmagicdesign.com/EULA/DeckLinkSDK; or
**
** (2) if the Software is obtained from any third party, such licensing terms
** as notified by that third party,
**
** and all subject to the following:
**
** (3) the copyright notices in the Software and this entire statement,
** including the above license grant, this restriction and the following
** disclaimer, must be included in all copies of the Software, in whole or in
** part, and all derivative works of the Software, unless such copies or
** derivative works are solely in the form of machine-executable object code
** generated by a source language processor.
**
** (4) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
** SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
** FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
** ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
** DEALINGS IN THE SOFTWARE.
**
** A copy of the Software is available free of charge at
** https://www.blackmagicdesign.com/desktopvideo_sdk under the EULA.
**
** -LICENSE-END-
*/
#include "VideoFrameTransfer.h"
#include "NativeHandles.h"
#define DVP_CHECK(cmd) { \
DVPStatus hr = (cmd); \
if (DVP_STATUS_OK != hr) { \
OutputDebugStringA( #cmd " failed\n" ); \
ExitProcess(hr); \
} \
}
// Initialise static members
bool VideoFrameTransfer::mInitialized = false;
bool VideoFrameTransfer::mUseDvp = false;
unsigned VideoFrameTransfer::mWidth = 0;
unsigned VideoFrameTransfer::mHeight = 0;
GLuint VideoFrameTransfer::mCaptureTexture = 0;
// NVIDIA specific static members
DVPBufferHandle VideoFrameTransfer::mDvpCaptureTextureHandle = 0;
DVPBufferHandle VideoFrameTransfer::mDvpPlaybackTextureHandle = 0;
uint32_t VideoFrameTransfer::mBufferAddrAlignment = 0;
uint32_t VideoFrameTransfer::mBufferGpuStrideAlignment = 0;
uint32_t VideoFrameTransfer::mSemaphoreAddrAlignment = 0;
uint32_t VideoFrameTransfer::mSemaphoreAllocSize = 0;
uint32_t VideoFrameTransfer::mSemaphorePayloadOffset = 0;
uint32_t VideoFrameTransfer::mSemaphorePayloadSize = 0;
bool VideoFrameTransfer::isNvidiaDvpAvailable()
{
// Look for supported graphics boards
const GLubyte* renderer = glGetString(GL_RENDERER);
if (renderer == NULL)
return false;
bool hasDvp = (strstr((char*)renderer, "Quadro") != NULL);
return hasDvp;
}
bool VideoFrameTransfer::isAMDPinnedMemoryAvailable()
{
// GL_AMD_pinned_memory presence indicates GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD buffer target is supported
const GLubyte* strExt = glGetString(GL_EXTENSIONS);
if (strExt == NULL)
{
// In a core profile context GL_EXTENSIONS is no longer queryable via glGetString().
// Treat this as "extension unavailable" for now; the fast-transfer path is optional.
return false;
}
bool hasAMDPinned = (strstr((char*)strExt, "GL_AMD_pinned_memory") != NULL);
return hasAMDPinned;
}
bool VideoFrameTransfer::checkFastMemoryTransferAvailable()
{
return (isNvidiaDvpAvailable() || isAMDPinnedMemoryAvailable());
}
bool VideoFrameTransfer::initialize(unsigned width, unsigned height, GLuint captureTexture, GLuint playbackTexture)
{
if (mInitialized)
return false;
bool hasDvp = isNvidiaDvpAvailable();
bool hasAMDPinned = isAMDPinnedMemoryAvailable();
if (!hasDvp && !hasAMDPinned)
return false;
mUseDvp = hasDvp;
mWidth = width;
mHeight = height;
mCaptureTexture = captureTexture;
if (! initializeMemoryLocking(mWidth * mHeight * 4)) // BGRA uses 4 bytes per pixel
return false;
if (mUseDvp)
{
// DVP initialisation
DVP_CHECK(dvpInitGLContext(DVP_DEVICE_FLAGS_SHARE_APP_CONTEXT));
DVP_CHECK(dvpGetRequiredConstantsGLCtx( &mBufferAddrAlignment, &mBufferGpuStrideAlignment,
&mSemaphoreAddrAlignment, &mSemaphoreAllocSize,
&mSemaphorePayloadOffset, &mSemaphorePayloadSize));
// Register textures with DVP
DVP_CHECK(dvpCreateGPUTextureGL(captureTexture, &mDvpCaptureTextureHandle));
DVP_CHECK(dvpCreateGPUTextureGL(playbackTexture, &mDvpPlaybackTextureHandle));
}
mInitialized = true;
return true;
}
bool VideoFrameTransfer::initializeMemoryLocking(unsigned memSize)
{
// Increase the process working set size to allow pinning of memory.
static SIZE_T dwMin = 0, dwMax = 0;
UniqueHandle processHandle(OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_SET_QUOTA, FALSE, GetCurrentProcessId()));
if (!processHandle.valid())
return false;
// Retrieve the working set size of the process.
if (!dwMin && !GetProcessWorkingSetSize(processHandle.get(), &dwMin, &dwMax))
return false;
// Allow for 80 frames to be locked
BOOL res = SetProcessWorkingSetSize(processHandle.get(), memSize * 80 + dwMin, memSize * 80 + (dwMax-dwMin));
if (!res)
return false;
return true;
}
// SyncInfo sets up a semaphore which is shared between the GPU and CPU and used to
// synchronise access to DVP buffers.
struct SyncInfo
{
SyncInfo(uint32_t semaphoreAllocSize, uint32_t semaphoreAddrAlignment);
~SyncInfo();
volatile uint32_t* mSem;
volatile uint32_t mReleaseValue;
volatile uint32_t mAcquireValue;
DVPSyncObjectHandle mDvpSync;
};
SyncInfo::SyncInfo(uint32_t semaphoreAllocSize, uint32_t semaphoreAddrAlignment)
{
mSem = (uint32_t*)_aligned_malloc(semaphoreAllocSize, semaphoreAddrAlignment);
// Initialise
mSem[0] = 0;
mReleaseValue = 0;
mAcquireValue = 0;
// Setup DVP sync object and import it
DVPSyncObjectDesc syncObjectDesc;
syncObjectDesc.externalClientWaitFunc = NULL;
syncObjectDesc.sem = (uint32_t*)mSem;
DVP_CHECK(dvpImportSyncObject(&syncObjectDesc, &mDvpSync));
}
SyncInfo::~SyncInfo()
{
DVP_CHECK(dvpFreeSyncObject(mDvpSync));
_aligned_free((void*)mSem);
}
VideoFrameTransfer::VideoFrameTransfer(unsigned long memSize, void* address, Direction direction) :
mBuffer(address),
mMemSize(memSize),
mDirection(direction),
mExtSync(NULL),
mGpuSync(NULL),
mDvpSysMemHandle(0),
mBufferHandle(0)
{
if (mUseDvp)
{
// Pin the memory
if (! VirtualLock(mBuffer, mMemSize))
throw std::runtime_error("Error pinning memory with VirtualLock");
// Create necessary sysmem and gpu sync objects
mExtSync = new SyncInfo(mSemaphoreAllocSize, mSemaphoreAddrAlignment);
mGpuSync = new SyncInfo(mSemaphoreAllocSize, mSemaphoreAddrAlignment);
// Register system memory buffers with DVP
DVPSysmemBufferDesc sysMemBuffersDesc;
sysMemBuffersDesc.width = mWidth;
sysMemBuffersDesc.height = mHeight;
sysMemBuffersDesc.stride = mWidth * 4;
sysMemBuffersDesc.format = DVP_BGRA;
sysMemBuffersDesc.type = DVP_UNSIGNED_BYTE;
sysMemBuffersDesc.size = mMemSize;
sysMemBuffersDesc.bufAddr = mBuffer;
if (mDirection == CPUtoGPU)
{
// A UYVY 4:2:2 frame is transferred to the GPU, rather than RGB 4:4:4, so width is halved
sysMemBuffersDesc.width /= 2;
sysMemBuffersDesc.stride /= 2;
}
DVP_CHECK(dvpCreateBuffer(&sysMemBuffersDesc, &mDvpSysMemHandle));
DVP_CHECK(dvpBindToGLCtx(mDvpSysMemHandle));
}
else
{
// Create an OpenGL buffer handle to use for pinned memory
GLuint bufferHandle;
glGenBuffers(1, &bufferHandle);
// Pin memory by binding buffer to special AMD target.
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, bufferHandle);
// glBufferData() sets up the address so any OpenGL operation on this buffer will use system memory directly
// (assumes address is aligned to 4k boundary).
glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, mMemSize, address, GL_STREAM_DRAW);
GLenum result = glGetError();
if (result != GL_NO_ERROR)
{
throw std::runtime_error("Error pinning memory with glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, ...)");
}
glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); // Unbind buffer to target
mBufferHandle = bufferHandle;
}
}
VideoFrameTransfer::~VideoFrameTransfer()
{
if (mUseDvp)
{
DVP_CHECK(dvpUnbindFromGLCtx(mDvpSysMemHandle));
DVP_CHECK(dvpDestroyBuffer(mDvpSysMemHandle));
delete mExtSync;
delete mGpuSync;
VirtualUnlock(mBuffer, mMemSize);
}
else
{
// The buffer is un-pinned by the GPU when the buffer is deleted
glDeleteBuffers(1, &mBufferHandle);
}
}
bool VideoFrameTransfer::performFrameTransfer()
{
if (mUseDvp)
{
// NVIDIA DVP transfers
DVPStatus status;
mGpuSync->mReleaseValue++;
dvpBegin();
if (mDirection == CPUtoGPU)
{
// Copy from system memory to GPU texture
dvpMapBufferWaitDVP(mDvpCaptureTextureHandle);
status = dvpMemcpyLined( mDvpSysMemHandle, mExtSync->mDvpSync, mExtSync->mAcquireValue, DVP_TIMEOUT_IGNORED,
mDvpCaptureTextureHandle, mGpuSync->mDvpSync, mGpuSync->mReleaseValue, 0, mHeight);
dvpMapBufferEndDVP(mDvpCaptureTextureHandle);
}
else
{
// Copy from GPU texture to system memory
dvpMapBufferWaitDVP(mDvpPlaybackTextureHandle);
status = dvpMemcpyLined( mDvpPlaybackTextureHandle, mExtSync->mDvpSync, mExtSync->mReleaseValue, DVP_TIMEOUT_IGNORED,
mDvpSysMemHandle, mGpuSync->mDvpSync, mGpuSync->mReleaseValue, 0, mHeight);
dvpMapBufferEndDVP(mDvpPlaybackTextureHandle);
}
dvpEnd();
return (status == DVP_STATUS_OK);
}
else
{
// AMD pinned memory transfers
if (mDirection == CPUtoGPU)
{
glEnable(GL_TEXTURE_2D);
// Use a pinned buffer for the GL_PIXEL_UNPACK_BUFFER target
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mBufferHandle);
glBindTexture(GL_TEXTURE_2D, mCaptureTexture);
// NULL for last arg indicates use current GL_PIXEL_UNPACK_BUFFER target as texture data
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth/2, mHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
// Ensure pinned texture has been transferred to GPU before we draw with it
GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 40 * 1000 * 1000); // timeout in nanosec
glDeleteSync(fence);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glDisable(GL_TEXTURE_2D);
}
else
{
// Use a PIXEL PACK BUFFER to read back pixels
glBindBuffer(GL_PIXEL_PACK_BUFFER, mBufferHandle);
glReadPixels(0, 0, mWidth, mHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
// Ensure GPU has processed all commands in the pipeline up to this point, before memory is read by the CPU
GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 40 * 1000 * 1000); // timeout in nanosec
glDeleteSync(fence);
}
return (glGetError() == GL_NO_ERROR);
}
}
void VideoFrameTransfer::waitForTransferComplete()
{
if (!mUseDvp)
return;
// Block until buffer has completely transferred between GPU and CPU buffer
dvpBegin();
dvpSyncObjClientWaitComplete(mGpuSync->mDvpSync, DVP_TIMEOUT_IGNORED);
dvpEnd();
}
void VideoFrameTransfer::beginTextureInUse(Direction direction)
{
if (!mUseDvp)
return;
if (direction == CPUtoGPU)
dvpMapBufferWaitAPI(mDvpCaptureTextureHandle);
else
dvpMapBufferWaitAPI(mDvpPlaybackTextureHandle);
}
void VideoFrameTransfer::endTextureInUse(Direction direction)
{
if (!mUseDvp)
return;
if (direction == CPUtoGPU)
dvpMapBufferEndAPI(mDvpCaptureTextureHandle);
else
dvpMapBufferEndAPI(mDvpPlaybackTextureHandle);
}

View File

@@ -1,109 +0,0 @@
/* -LICENSE-START-
** Copyright (c) 2012 Blackmagic Design
**
** Permission is hereby granted, free of charge, to any person or organization
** obtaining a copy of the software and accompanying documentation (the
** "Software") to use, reproduce, display, distribute, sub-license, execute,
** and transmit the Software, and to prepare derivative works of the Software,
** and to permit third-parties to whom the Software is furnished to do so, in
** accordance with:
**
** (1) if the Software is obtained from Blackmagic Design, the End User License
** Agreement for the Software Development Kit ("EULA") available at
** https://www.blackmagicdesign.com/EULA/DeckLinkSDK; or
**
** (2) if the Software is obtained from any third party, such licensing terms
** as notified by that third party,
**
** and all subject to the following:
**
** (3) the copyright notices in the Software and this entire statement,
** including the above license grant, this restriction and the following
** disclaimer, must be included in all copies of the Software, in whole or in
** part, and all derivative works of the Software, unless such copies or
** derivative works are solely in the form of machine-executable object code
** generated by a source language processor.
**
** (4) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
** SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
** FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
** ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
** DEALINGS IN THE SOFTWARE.
**
** A copy of the Software is available free of charge at
** https://www.blackmagicdesign.com/desktopvideo_sdk under the EULA.
**
** -LICENSE-END-
*/
#ifndef __VIDEO_FRAME_TRANSFER_H__
#define __VIDEO_FRAME_TRANSFER_H__
#include "GLExtensions.h"
#include <stdexcept>
#include <map>
// NVIDIA GPU Direct For Video with OpenGL requires the following two headers.
// See the NVIDIA website to check if your graphics card is supported.
#include <DVPAPI.h>
#include <dvpapi_gl.h>
struct SyncInfo;
// Class for performing efficient frame memory transfers between the CPU and GPU,
// using NVIDIA and AMD extensions.
class VideoFrameTransfer
{
public:
enum Direction
{
CPUtoGPU,
GPUtoCPU
};
VideoFrameTransfer(unsigned long memSize, void* address, Direction direction);
~VideoFrameTransfer();
static bool checkFastMemoryTransferAvailable();
static bool initialize(unsigned width, unsigned height, GLuint captureTexture, GLuint playbackTexture);
static void beginTextureInUse(Direction direction);
static void endTextureInUse(Direction direction);
bool performFrameTransfer();
void waitForTransferComplete();
private:
static bool isNvidiaDvpAvailable();
static bool isAMDPinnedMemoryAvailable();
static bool initializeMemoryLocking(unsigned memSize);
void* mBuffer;
unsigned long mMemSize;
Direction mDirection;
static bool mInitialized;
static bool mUseDvp;
static unsigned mWidth;
static unsigned mHeight;
static GLuint mCaptureTexture;
// NVIDIA GPU Direct for Video support
SyncInfo* mExtSync;
SyncInfo* mGpuSync;
DVPBufferHandle mDvpSysMemHandle;
static DVPBufferHandle mDvpCaptureTextureHandle;
static DVPBufferHandle mDvpPlaybackTextureHandle;
static uint32_t mBufferAddrAlignment;
static uint32_t mBufferGpuStrideAlignment;
static uint32_t mSemaphoreAddrAlignment;
static uint32_t mSemaphoreAllocSize;
static uint32_t mSemaphorePayloadOffset;
static uint32_t mSemaphorePayloadSize;
// GPU buffer bound to the target GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD for pinned memory
GLuint mBufferHandle;
};
#endif