From c9fed70a60854cad786e17127d9bbcf6dc5230ea Mon Sep 17 00:00:00 2001 From: Aiden Date: Fri, 8 May 2026 13:27:41 +1000 Subject: [PATCH] 16bit processing --- .gitea/workflows/ci.yml | 32 +- CMakeLists.txt | 37 +- README.md | 26 +- SHADER_CONTRACT.md | 7 + .../LoopThroughWithOpenGLCompositing.vcxproj | 37 +- ...roughWithOpenGLCompositing.vcxproj.filters | 12 +- .../decklink/DeckLinkFrameTransfer.cpp | 305 -------------- .../decklink/DeckLinkFrameTransfer.h | 98 ----- .../decklink/DeckLinkSession.cpp | 165 ++++++-- .../decklink/DeckLinkSession.h | 21 +- .../decklink/VideoIOFormat.cpp | 139 +++++++ .../decklink/VideoIOFormat.h | 38 ++ .../gl/GLExtensions.h | 3 + .../gl/GlShaderSources.cpp | 111 +++++- .../gl/GlShaderSources.h | 1 + .../gl/OpenGLComposite.cpp | 39 +- .../gl/OpenGLDeckLinkBridge.cpp | 76 ++-- .../gl/OpenGLRenderPass.cpp | 21 +- .../gl/OpenGLRenderPass.h | 5 +- .../gl/OpenGLRenderer.cpp | 82 +++- .../gl/OpenGLRenderer.h | 19 +- .../gl/OpenGLShaderPrograms.cpp | 5 + .../gl/OpenGLShaderPrograms.h | 1 + .../gl/ShaderProgramCompiler.cpp | 50 +++ .../gl/ShaderProgramCompiler.h | 1 + .../gl/TemporalHistoryBuffers.cpp | 2 +- .../gl/TextureAssetLoader.h | 1 + .../gl/VideoFrameTransfer.cpp | 377 ------------------ .../gl/VideoFrameTransfer.h | 109 ----- tests/VideoIOFormatTests.cpp | 79 ++++ 30 files changed, 770 insertions(+), 1129 deletions(-) create mode 100644 apps/LoopThroughWithOpenGLCompositing/decklink/VideoIOFormat.cpp create mode 100644 apps/LoopThroughWithOpenGLCompositing/decklink/VideoIOFormat.h delete mode 100644 apps/LoopThroughWithOpenGLCompositing/gl/VideoFrameTransfer.cpp delete mode 100644 apps/LoopThroughWithOpenGLCompositing/gl/VideoFrameTransfer.h create mode 100644 tests/VideoIOFormatTests.cpp diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index a1043ad..5fe35f2 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -31,14 +31,6 @@ jobs: - name: Configure Debug shell: powershell run: | - $gpudirectDir = "${{ vars.GPUDIRECT_DIR }}" - if ([string]::IsNullOrWhiteSpace($gpudirectDir)) { - $gpudirectDir = $env:GPUDIRECT_DIR - } - if ([string]::IsNullOrWhiteSpace($gpudirectDir)) { - $gpudirectDir = Join-Path $PWD "3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect" - } - $slangRoot = "${{ vars.SLANG_ROOT }}" if ([string]::IsNullOrWhiteSpace($slangRoot)) { $slangRoot = $env:SLANG_ROOT @@ -48,9 +40,6 @@ jobs: } $requiredFiles = @( - (Join-Path $gpudirectDir "include\DVPAPI.h"), - (Join-Path $gpudirectDir "lib\x64\dvp.lib"), - (Join-Path $gpudirectDir "bin\x64\dvp.dll"), (Join-Path $slangRoot "bin\slangc.exe"), (Join-Path $slangRoot "bin\slang-compiler.dll"), (Join-Path $slangRoot "bin\slang-glslang.dll"), @@ -59,13 +48,12 @@ jobs: $missingFiles = @($requiredFiles | Where-Object { -not (Test-Path -LiteralPath $_) }) if ($missingFiles.Count -gt 0) { - Write-Error "Missing native third-party dependencies. Set Gitea repository variables GPUDIRECT_DIR and SLANG_ROOT, or pre-populate the repo-local 3rdParty folder on the Windows runner. Missing: $($missingFiles -join ', ')" + Write-Error "Missing native third-party dependencies. Set Gitea repository variable SLANG_ROOT, or pre-populate the repo-local 3rdParty folder on the Windows runner. Missing: $($missingFiles -join ', ')" exit 1 } - Write-Host "Using GPUDIRECT_DIR=$gpudirectDir" Write-Host "Using SLANG_ROOT=$slangRoot" - cmake --preset vs2022-x64-debug -DGPUDIRECT_DIR="$gpudirectDir" -DSLANG_ROOT="$slangRoot" + cmake --preset vs2022-x64-debug -DSLANG_ROOT="$slangRoot" - name: Build Debug shell: powershell @@ -122,14 +110,6 @@ jobs: - name: Configure Release shell: powershell run: | - $gpudirectDir = "${{ vars.GPUDIRECT_DIR }}" - if ([string]::IsNullOrWhiteSpace($gpudirectDir)) { - $gpudirectDir = $env:GPUDIRECT_DIR - } - if ([string]::IsNullOrWhiteSpace($gpudirectDir)) { - $gpudirectDir = Join-Path $PWD "3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect" - } - $slangRoot = "${{ vars.SLANG_ROOT }}" if ([string]::IsNullOrWhiteSpace($slangRoot)) { $slangRoot = $env:SLANG_ROOT @@ -139,9 +119,6 @@ jobs: } $requiredFiles = @( - (Join-Path $gpudirectDir "include\DVPAPI.h"), - (Join-Path $gpudirectDir "lib\x64\dvp.lib"), - (Join-Path $gpudirectDir "bin\x64\dvp.dll"), (Join-Path $slangRoot "bin\slangc.exe"), (Join-Path $slangRoot "bin\slang-compiler.dll"), (Join-Path $slangRoot "bin\slang-glslang.dll"), @@ -150,13 +127,12 @@ jobs: $missingFiles = @($requiredFiles | Where-Object { -not (Test-Path -LiteralPath $_) }) if ($missingFiles.Count -gt 0) { - Write-Error "Missing native third-party dependencies. Set Gitea repository variables GPUDIRECT_DIR and SLANG_ROOT, or pre-populate the repo-local 3rdParty folder on the Windows runner. Missing: $($missingFiles -join ', ')" + Write-Error "Missing native third-party dependencies. Set Gitea repository variable SLANG_ROOT, or pre-populate the repo-local 3rdParty folder on the Windows runner. Missing: $($missingFiles -join ', ')" exit 1 } - Write-Host "Using GPUDIRECT_DIR=$gpudirectDir" Write-Host "Using SLANG_ROOT=$slangRoot" - cmake --preset vs2022-x64-release -DGPUDIRECT_DIR="$gpudirectDir" -DSLANG_ROOT="$slangRoot" + cmake --preset vs2022-x64-release -DSLANG_ROOT="$slangRoot" - name: Build Release shell: powershell diff --git a/CMakeLists.txt b/CMakeLists.txt index 6db8a0b..0321055 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,17 +7,12 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(APP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/apps/LoopThroughWithOpenGLCompositing") -set(GPUDIRECT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/3rdParty/Blackmagic DeckLink SDK 16.0/Win/Samples/NVIDIA_GPUDirect" CACHE PATH "Path to the NVIDIA_GPUDirect sample directory from the Blackmagic DeckLink SDK") set(SLANG_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/3rdParty/slang-2026.8-windows-x86_64" CACHE PATH "Path to a Slang binary release containing bin/slangc.exe") if(NOT EXISTS "${APP_DIR}/LoopThroughWithOpenGLCompositing.cpp") message(FATAL_ERROR "Imported app sources were not found under ${APP_DIR}") endif() -if(NOT EXISTS "${GPUDIRECT_DIR}/lib/x64/dvp.lib") - message(FATAL_ERROR "NVIDIA GPUDirect library not found under ${GPUDIRECT_DIR}") -endif() - set(SLANG_RUNTIME_FILES "${SLANG_ROOT}/bin/slangc.exe" "${SLANG_ROOT}/bin/slang-compiler.dll" @@ -51,6 +46,8 @@ set(APP_SOURCES "${APP_DIR}/decklink/DeckLinkFrameTransfer.h" "${APP_DIR}/decklink/DeckLinkSession.cpp" "${APP_DIR}/decklink/DeckLinkSession.h" + "${APP_DIR}/decklink/VideoIOFormat.cpp" + "${APP_DIR}/decklink/VideoIOFormat.h" "${APP_DIR}/gl/GLExtensions.cpp" "${APP_DIR}/gl/GLExtensions.h" "${APP_DIR}/gl/GlobalParamsBuffer.cpp" @@ -83,8 +80,6 @@ set(APP_SOURCES "${APP_DIR}/gl/TextureAssetLoader.h" "${APP_DIR}/gl/TemporalHistoryBuffers.cpp" "${APP_DIR}/gl/TemporalHistoryBuffers.h" - "${APP_DIR}/gl/VideoFrameTransfer.cpp" - "${APP_DIR}/gl/VideoFrameTransfer.h" "${APP_DIR}/LoopThroughWithOpenGLCompositing.cpp" "${APP_DIR}/LoopThroughWithOpenGLCompositing.h" "${APP_DIR}/LoopThroughWithOpenGLCompositing.rc" @@ -119,15 +114,9 @@ target_include_directories(LoopThroughWithOpenGLCompositing PRIVATE "${APP_DIR}/platform" "${APP_DIR}/runtime" "${APP_DIR}/shader" - "${GPUDIRECT_DIR}/include" -) - -target_link_directories(LoopThroughWithOpenGLCompositing PRIVATE - "${GPUDIRECT_DIR}/lib/x64" ) target_link_libraries(LoopThroughWithOpenGLCompositing PRIVATE - dvp.lib opengl32 glu32 Ws2_32 @@ -250,20 +239,26 @@ endif() add_test(NAME OscServerTests COMMAND OscServerTests) -add_custom_command(TARGET LoopThroughWithOpenGLCompositing POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${GPUDIRECT_DIR}/bin/x64/dvp.dll" - "$/dvp.dll" +add_executable(VideoIOFormatTests + "${APP_DIR}/decklink/VideoIOFormat.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/tests/VideoIOFormatTests.cpp" ) +target_include_directories(VideoIOFormatTests PRIVATE + "${APP_DIR}" + "${APP_DIR}/decklink" +) + +if(MSVC) + target_compile_options(VideoIOFormatTests PRIVATE /W3) +endif() + +add_test(NAME VideoIOFormatTests COMMAND VideoIOFormatTests) + install(TARGETS LoopThroughWithOpenGLCompositing RUNTIME DESTINATION "." ) -install(FILES "${GPUDIRECT_DIR}/bin/x64/dvp.dll" - DESTINATION "." -) - install(FILES ${SLANG_RUNTIME_FILES} DESTINATION "3rdParty/slang/bin" ) diff --git a/README.md b/README.md index 3519733..e5bdbdb 100644 --- a/README.md +++ b/README.md @@ -20,23 +20,9 @@ The app loads shader packages from `shaders/`, compiles Slang to GLSL at runtime - Windows with Visual Studio 2022 C++ tooling. - CMake 3.24 or newer. - Node.js and npm for the control UI. -- Blackmagic DeckLink SDK 16.0 with the NVIDIA GPUDirect sample files available locally. +- Blackmagic Desktop Video drivers and a DeckLink device. - Slang binary release with `slangc.exe`, `slang-compiler.dll`, `slang-glslang.dll`, and `LICENSE`. -The Blackmagic/GPUDirect SDK should not be committed to this repository. `CMakeLists.txt` exposes `GPUDIRECT_DIR` as a cache path so local machines and CI runners can point at their installed SDK location. - -Default expected SDK path: - -```text -3rdParty/Blackmagic DeckLink SDK 16.0/Win/Samples/NVIDIA_GPUDirect -``` - -Override example: - -```powershell -cmake --preset vs2022-x64-debug -DGPUDIRECT_DIR="D:/SDKs/Blackmagic DeckLink SDK 16.0/Win/Samples/NVIDIA_GPUDirect" -``` - Default expected Slang path: ```text @@ -87,7 +73,6 @@ The package folder will contain: ```text dist/VideoShader/ LoopThroughWithOpenGLCompositing.exe - dvp.dll config/ shaders/ 3rdParty/slang/bin/ @@ -227,9 +212,8 @@ The Gitea workflow expects two act runners: - `windows-2022`: builds the native app and runs native tests. - `ubuntu-latest`: installs UI dependencies and runs the Vite build. -The Windows jobs validate native third-party dependencies before configuring CMake. Because `3rdParty/` is ignored, configure these paths on the runner or in Gitea repository variables: +The Windows jobs validate native third-party dependencies before configuring CMake. Because `3rdParty/` is ignored, configure this path on the runner or in a Gitea repository variable: -- `GPUDIRECT_DIR`: path to `Blackmagic DeckLink SDK 16.0/Win/Samples/NVIDIA_GPUDirect`. - `SLANG_ROOT`: path to the Slang binary release folder containing `bin/slangc.exe`. The Windows runner also needs the Visual Studio ATL component installed. In Visual Studio Build Tools 2022, add `C++ ATL for latest v143 build tools (x86 & x64)`, component ID `Microsoft.VisualStudio.Component.VC.ATL`. @@ -237,11 +221,10 @@ The Windows runner also needs the Visual Studio ATL component installed. In Visu Example runner paths: ```text -D:\SDKs\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect D:\SDKs\slang-2026.8-windows-x86_64 ``` -If neither variable is set, the workflow falls back to the repo-local defaults under `3rdParty/`. +If `SLANG_ROOT` is not set, the workflow falls back to the repo-local default under `3rdParty/`. ## Still Todo @@ -261,4 +244,5 @@ If neither variable is set, the workflow falls back to the repo-local defaults u - compute shaders or a small 1x1 or nx1 RGBA16f render target for abritary data store - allow shaders to read other shaders data store based on name? or putput over OSC - Mipmappong needed? -- unwrap a fish eyelens and mirror it and map it to equirectangulr for environmnet map purposes \ No newline at end of file +- unwrap a fish eyelens and mirror it and map it to equirectangulr for environmnet map purposes +- add a random flaot between 0-1 that changes each time the application loads for shaders to use diff --git a/SHADER_CONTRACT.md b/SHADER_CONTRACT.md index dd4e012..c3d7844 100644 --- a/SHADER_CONTRACT.md +++ b/SHADER_CONTRACT.md @@ -172,6 +172,13 @@ Fields: - `sourceHistoryLength`: number of usable source-history frames currently available. - `temporalHistoryLength`: number of usable temporal frames currently available for this layer. +Color/precision notes: + +- `context.sourceColor`, `sampleVideo()`, and temporal history samples are display-referred Rec.709-like RGB, not linear-light RGB. +- The host prefers 10-bit DeckLink YUV capture and output when the card/mode supports it, with automatic 8-bit fallback. +- Internal decoded, layer, composite, output, and temporal render targets are 16-bit floating point, so gradients and LUT work have more headroom than the packed DeckLink byte formats. +- Do not add extra Rec.709 or linear conversions unless the shader intentionally documents that behavior. + ## Helper Functions The wrapper provides: diff --git a/apps/LoopThroughWithOpenGLCompositing/LoopThroughWithOpenGLCompositing.vcxproj b/apps/LoopThroughWithOpenGLCompositing/LoopThroughWithOpenGLCompositing.vcxproj index 1eb62d8..cab98a5 100644 --- a/apps/LoopThroughWithOpenGLCompositing/LoopThroughWithOpenGLCompositing.vcxproj +++ b/apps/LoopThroughWithOpenGLCompositing/LoopThroughWithOpenGLCompositing.vcxproj @@ -89,7 +89,7 @@ Disabled - .;control;decklink;gl;..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\include;%(AdditionalIncludeDirectories) + .;control;decklink;gl;%(AdditionalIncludeDirectories) WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) EnableFastChecks MultiThreadedDebugDLL @@ -99,15 +99,11 @@ stdcpp17 - dvp.lib;opengl32.lib;Glu32.lib;%(AdditionalDependencies) - ..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\lib\win32;%(AdditionalLibraryDirectories) + opengl32.lib;Glu32.lib;%(AdditionalDependencies) true Windows MachineX86 - - copy /y "..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\bin\$(Platform)\dvp.dll" "$(TargetDir)" - @@ -115,7 +111,7 @@ Disabled - .;control;decklink;gl;..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\include;%(AdditionalIncludeDirectories) + .;control;decklink;gl;%(AdditionalIncludeDirectories) WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) EnableFastChecks MultiThreadedDebugDLL @@ -125,21 +121,17 @@ stdcpp17 - dvp.lib;opengl32.lib;Glu32.lib;%(AdditionalDependencies) - ..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\lib\x64;%(AdditionalLibraryDirectories) + opengl32.lib;Glu32.lib;%(AdditionalDependencies) true Windows MachineX64 - - copy /y "..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\bin\$(Platform)\dvp.dll" "$(TargetDir)" - MaxSpeed true - .;control;decklink;gl;..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\include;%(AdditionalIncludeDirectories) + .;control;decklink;gl;%(AdditionalIncludeDirectories) WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) MultiThreadedDLL true @@ -149,18 +141,13 @@ stdcpp17 - dvp.lib;opengl32.lib;Glu32.lib;%(AdditionalDependencies) - ..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\lib\win32;%(AdditionalLibraryDirectories) + opengl32.lib;Glu32.lib;%(AdditionalDependencies) true Windows true true MachineX86 - - Copy nececssary DLLs to target directory - copy /y "..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\bin\$(Platform)\dvp.dll" "$(TargetDir)" - @@ -169,7 +156,7 @@ MaxSpeed true - .;control;decklink;gl;..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\include;%(AdditionalIncludeDirectories) + .;control;decklink;gl;%(AdditionalIncludeDirectories) WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) MultiThreadedDLL true @@ -179,17 +166,13 @@ stdcpp17 - dvp.lib;opengl32.lib;Glu32.lib;%(AdditionalDependencies) - ..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\lib\x64;%(AdditionalLibraryDirectories) + opengl32.lib;Glu32.lib;%(AdditionalDependencies) true Windows true true MachineX64 - - copy /y "..\..\3rdParty\Blackmagic DeckLink SDK 16.0\Win\Samples\NVIDIA_GPUDirect\bin\$(Platform)\dvp.dll" "$(TargetDir)" - @@ -206,10 +189,10 @@ Create Create - + @@ -224,9 +207,9 @@ - + diff --git a/apps/LoopThroughWithOpenGLCompositing/LoopThroughWithOpenGLCompositing.vcxproj.filters b/apps/LoopThroughWithOpenGLCompositing/LoopThroughWithOpenGLCompositing.vcxproj.filters index d8ae771..356889b 100644 --- a/apps/LoopThroughWithOpenGLCompositing/LoopThroughWithOpenGLCompositing.vcxproj.filters +++ b/apps/LoopThroughWithOpenGLCompositing/LoopThroughWithOpenGLCompositing.vcxproj.filters @@ -45,9 +45,6 @@ Source Files - - Source Files - DeckLink API @@ -57,6 +54,9 @@ Source Files + + Source Files + Source Files @@ -95,15 +95,15 @@ Header Files - - Header Files - Header Files Header Files + + Header Files + Header Files diff --git a/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkFrameTransfer.cpp b/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkFrameTransfer.cpp index f561c9d..9f81db8 100644 --- a/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkFrameTransfer.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkFrameTransfer.cpp @@ -2,311 +2,6 @@ #include "OpenGLComposite.h" -#include - -DEFINE_GUID(IID_PinnedMemoryAllocator, - 0xddf921a6, 0x279d, 0x4dcd, 0x86, 0x26, 0x75, 0x7f, 0x58, 0xa8, 0xc4, 0x35); - -//////////////////////////////////////////// -// PinnedMemoryAllocator -//////////////////////////////////////////// - -// PinnedMemoryAllocator implements the IDeckLinkVideoBufferAllocator interface to be used instead of the -// built-in buffer allocator. -// -// For this sample application a custom buffer allocator is used to ensure each address -// of buffer memory is aligned on a 4kB boundary required by the OpenGL pinned memory extension. -// If the pinned memory extension is not available, this allocator will still be used and -// demonstrates how to cache buffer allocations for efficiency. -// -// The frame cache delays the releasing of buffers until the cache fills up, thereby avoiding an -// allocate plus pin operation for every frame, followed by an unpin and deallocate on every frame. -PinnedMemoryAllocator::PinnedMemoryAllocator(HDC hdc, HGLRC hglrc, VideoFrameTransfer::Direction direction, unsigned cacheSize, unsigned bufferSize) : - mHGLDC(hdc), - mHGLRC(hglrc), - mRefCount(1), - mDirection(direction), - mBufferSize(bufferSize), - mFrameCacheSize(cacheSize) -{ -} - -PinnedMemoryAllocator::~PinnedMemoryAllocator() -{ - // Cleanup any unused buffers that remain in the cache - while (!mFrameCache.empty()) - { - unPinAddress(mFrameCache.back()); - VirtualFree(mFrameCache.back(), 0, MEM_RELEASE); - mFrameCache.pop_back(); - } - - for (auto iter = mFrameTransfer.begin(); iter != mFrameTransfer.end(); ++iter) - delete iter->second; - mFrameTransfer.clear(); -} - -bool PinnedMemoryAllocator::transferFrame(void* address, GLuint gpuTexture) -{ - if (mFrameTransfer.count(address) == 0) - { - // VideoFrameTransfer prepares and pins address - mFrameTransfer[address] = new VideoFrameTransfer(mBufferSize, address, mDirection); - } - - return mFrameTransfer[address]->performFrameTransfer(); -} - -void PinnedMemoryAllocator::waitForTransferComplete(void* address) -{ - if (mFrameTransfer.count(address)) - mFrameTransfer[address]->waitForTransferComplete(); -} - -void PinnedMemoryAllocator::unPinAddress(void* address) -{ - // un-pin address only if it has been pinned for transfer - if (mFrameTransfer.count(address) > 0) - { - wglMakeCurrent(mHGLDC, mHGLRC); - mFrameTransfer.erase(address); - wglMakeCurrent(NULL, NULL); - } -} - -HRESULT STDMETHODCALLTYPE PinnedMemoryAllocator::QueryInterface(REFIID iid, LPVOID* ppv) -{ - if (!ppv) - return E_POINTER; - - if (iid == IID_IUnknown || iid == IID_PinnedMemoryAllocator) - { - *ppv = this; - } - else if (iid == IID_IDeckLinkVideoBufferAllocator) - { - *ppv = static_cast(this); - } - else - { - *ppv = nullptr; - return E_NOINTERFACE; - } - AddRef(); - return S_OK; -} - -ULONG STDMETHODCALLTYPE PinnedMemoryAllocator::AddRef(void) -{ - return ++mRefCount; -} - -ULONG STDMETHODCALLTYPE PinnedMemoryAllocator::Release(void) -{ - int newCount = --mRefCount; - if (newCount == 0) - delete this; - return newCount; -} - -HRESULT STDMETHODCALLTYPE PinnedMemoryAllocator::AllocateVideoBuffer(IDeckLinkVideoBuffer** allocatedBuffer) -{ - std::shared_ptr sharedMemBuffer; - - // Manage caching of allocated buffers via shared_ptr deleter. - auto deleter = [this](void* buffer) mutable { - if (mFrameCache.size() < mFrameCacheSize) - { - mFrameCache.push_back(buffer); - } - else - { - // No room left in cache, so un-pin (if it was pinned) and free this buffer - unPinAddress(buffer); - VirtualFree(buffer, 0, MEM_RELEASE); - } - // We AddRef this class once the deleter is used because this class owns the mem - Release(); - }; - - if (mFrameCache.empty()) - { - // Allocate memory on a page boundary - void* memBuffer = VirtualAlloc(NULL, mBufferSize, MEM_COMMIT | MEM_RESERVE | MEM_WRITE_WATCH, PAGE_READWRITE); - if (!memBuffer) - return E_OUTOFMEMORY; - - sharedMemBuffer = std::shared_ptr(memBuffer, deleter); - } - else - { - // Re-use most recently released address - sharedMemBuffer = std::shared_ptr(mFrameCache.back(), deleter); - mFrameCache.pop_back(); - } - - // This class owns the mem so the buffer we return needs to AddRef() this, and Release() in the deleter - AddRef(); - - *allocatedBuffer = new DeckLinkVideoBuffer(sharedMemBuffer, this); - return S_OK; -} - -//////////////////////////////////////////// -// InputAllocatorPool Class -//////////////////////////////////////////// -InputAllocatorPool::InputAllocatorPool(HDC hdc, HGLRC hglrc) -{ - mHDC = hdc; - mHGLRC = hglrc; -} - -HRESULT InputAllocatorPool::QueryInterface(REFIID iid, void** ppv) -{ - if (!ppv) - return E_POINTER; - - if (iid == IID_IUnknown) - { - *ppv = this; - } - else if (iid == IID_IDeckLinkVideoBufferAllocatorProvider) - { - *ppv = static_cast(this); - } - else - { - *ppv = nullptr; - return E_NOINTERFACE; - } - AddRef(); - return S_OK; -} - -ULONG InputAllocatorPool::AddRef(void) -{ - return ++mRefCount; -} - -ULONG InputAllocatorPool::Release(void) -{ - int newCount = --mRefCount; - if (newCount == 0) - delete this; - return newCount; -} - -HRESULT InputAllocatorPool::GetVideoBufferAllocator( - /* [in] */ unsigned int bufferSize, - /* [in] */ unsigned int, - /* [in] */ unsigned int, - /* [in] */ unsigned int, - /* [in] */ BMDPixelFormat, - /* [out] */ IDeckLinkVideoBufferAllocator** allocator) -{ - if (!allocator) - return E_POINTER; - - auto existing = mAllocatorBySize.find(bufferSize); - if (existing != mAllocatorBySize.end()) - { - *allocator = &*existing->second; - (*allocator)->AddRef(); - return S_OK; - } - - CComPtr newAllocator; - newAllocator.Attach(new (std::nothrow) PinnedMemoryAllocator(mHDC, mHGLRC, VideoFrameTransfer::CPUtoGPU, 3, bufferSize)); - if (!newAllocator) - return E_OUTOFMEMORY; - - mAllocatorBySize.emplace(std::make_pair(bufferSize, newAllocator)); - *allocator = newAllocator.Detach(); - return S_OK; -} - -//////////////////////////////////////////// -// DeckLink Video Buffer Class -//////////////////////////////////////////// -DeckLinkVideoBuffer::DeckLinkVideoBuffer(std::shared_ptr& buffer, PinnedMemoryAllocator* parent) : - mParentAllocator(parent), - mRefCount(1), - mBuffer(buffer) -{ -} - -HRESULT STDMETHODCALLTYPE DeckLinkVideoBuffer::QueryInterface(REFIID riid, void** ppvObject) -{ - HRESULT result = S_OK; - - if (ppvObject == nullptr) - return E_POINTER; - - if (riid == IID_IUnknown) - { - *ppvObject = this; - AddRef(); - } - else if (riid == IID_IDeckLinkVideoBuffer) - { - *ppvObject = static_cast(this); - AddRef(); - } - else if (riid == IID_PinnedMemoryAllocator) - { - result = mParentAllocator->QueryInterface(riid, ppvObject); - } - else - { - *ppvObject = nullptr; - result = E_NOINTERFACE; - } - - return result; -} - -ULONG STDMETHODCALLTYPE DeckLinkVideoBuffer::AddRef() -{ - return ++mRefCount; -} - -ULONG STDMETHODCALLTYPE DeckLinkVideoBuffer::Release() -{ - int newValue = --mRefCount; - if (newValue == 0) - delete this; - - return newValue; -} - -HRESULT STDMETHODCALLTYPE DeckLinkVideoBuffer::GetBytes(void** buffer) -{ - if (buffer == nullptr) - return E_POINTER; - - *buffer = mBuffer.get(); - return S_OK; -} - -HRESULT STDMETHODCALLTYPE DeckLinkVideoBuffer::GetSize(uint64_t* size) -{ - if (size == nullptr) - return E_POINTER; - - *size = mParentAllocator->bufferSize(); - return S_OK; -} - -HRESULT STDMETHODCALLTYPE DeckLinkVideoBuffer::StartAccess(BMDBufferAccessFlags) -{ - return S_OK; -} - -HRESULT STDMETHODCALLTYPE DeckLinkVideoBuffer::EndAccess(BMDBufferAccessFlags) -{ - return S_OK; -} - //////////////////////////////////////////// // DeckLink Capture Delegate Class //////////////////////////////////////////// diff --git a/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkFrameTransfer.h b/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkFrameTransfer.h index ac7ab58..9e12a0f 100644 --- a/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkFrameTransfer.h +++ b/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkFrameTransfer.h @@ -1,111 +1,13 @@ #pragma once #include -#include -#include #include -#include -#include -#include #include "DeckLinkAPI_h.h" -#include "VideoFrameTransfer.h" - -extern "C" const IID IID_PinnedMemoryAllocator; class OpenGLComposite; -//////////////////////////////////////////// -// PinnedMemoryAllocator -//////////////////////////////////////////// -class PinnedMemoryAllocator : public IDeckLinkVideoBufferAllocator -{ -public: - PinnedMemoryAllocator(HDC hdc, HGLRC hglrc, VideoFrameTransfer::Direction direction, unsigned cacheSize, unsigned bufferSize); - virtual ~PinnedMemoryAllocator(); - - bool transferFrame(void* address, GLuint gpuTexture); - void waitForTransferComplete(void* address); - unsigned bufferSize() { return mBufferSize; } - - // IUnknown methods - virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, LPVOID* ppv) override; - virtual ULONG STDMETHODCALLTYPE AddRef(void) override; - virtual ULONG STDMETHODCALLTYPE Release(void) override; - - // IDeckLinkVideoBufferAllocator methods - virtual HRESULT STDMETHODCALLTYPE AllocateVideoBuffer(IDeckLinkVideoBuffer** allocatedBuffer) override; - -private: - void unPinAddress(void* address); - -private: - HDC mHGLDC; - HGLRC mHGLRC; - std::atomic mRefCount; - VideoFrameTransfer::Direction mDirection; - std::map mFrameTransfer; - unsigned mBufferSize; - std::vector mFrameCache; - unsigned mFrameCacheSize; -}; - -//////////////////////////////////////////// -// InputAllocatorPool -//////////////////////////////////////////// -class InputAllocatorPool : public IDeckLinkVideoBufferAllocatorProvider -{ -public: - InputAllocatorPool(HDC hdc, HGLRC hglrc); - - // IUnknown interface - ULONG STDMETHODCALLTYPE AddRef() override; - ULONG STDMETHODCALLTYPE Release() override; - HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void** ppv) override; - - // IDeckLinkVideoBufferAllocatorProvider interface - HRESULT STDMETHODCALLTYPE GetVideoBufferAllocator( - /* [in] */ unsigned int bufferSize, - /* [in] */ unsigned int width, - /* [in] */ unsigned int height, - /* [in] */ unsigned int rowBytes, - /* [in] */ BMDPixelFormat pixelFormat, - /* [out] */ IDeckLinkVideoBufferAllocator** allocator) override; - -private: - std::atomic mRefCount; - std::map > mAllocatorBySize; - HDC mHDC; - HGLRC mHGLRC; -}; - -//////////////////////////////////////////// -// DeckLinkVideoBuffer -//////////////////////////////////////////// -class DeckLinkVideoBuffer : public IDeckLinkVideoBuffer -{ -public: - explicit DeckLinkVideoBuffer(std::shared_ptr& buffer, PinnedMemoryAllocator* parent); - virtual ~DeckLinkVideoBuffer() = default; - - // IUnknown interface - virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject) override; - virtual ULONG STDMETHODCALLTYPE AddRef(void) override; - virtual ULONG STDMETHODCALLTYPE Release(void) override; - - // IDeckLinkVideoBuffer interface - virtual HRESULT STDMETHODCALLTYPE GetBytes(void** buffer) override; - virtual HRESULT STDMETHODCALLTYPE GetSize(uint64_t* size) override; - virtual HRESULT STDMETHODCALLTYPE StartAccess(BMDBufferAccessFlags flags) override; - virtual HRESULT STDMETHODCALLTYPE EndAccess(BMDBufferAccessFlags flags) override; - -private: - CComPtr mParentAllocator; - std::atomic mRefCount; - std::shared_ptr mBuffer; -}; - //////////////////////////////////////////// // Capture Delegate Class //////////////////////////////////////////// diff --git a/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkSession.cpp b/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkSession.cpp index 5a7084b..a978f86 100644 --- a/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkSession.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkSession.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace @@ -25,6 +26,42 @@ std::string BstrToUtf8(BSTR value) return std::string(utf8Name.data()); } + +bool InputSupportsFormat(IDeckLinkInput* input, BMDDisplayMode displayMode, BMDPixelFormat pixelFormat) +{ + if (input == nullptr) + return false; + + BOOL supported = FALSE; + BMDDisplayMode actualMode = bmdModeUnknown; + const HRESULT result = input->DoesSupportVideoMode( + bmdVideoConnectionUnspecified, + displayMode, + pixelFormat, + bmdNoVideoInputConversion, + bmdSupportedVideoModeDefault, + &actualMode, + &supported); + return result == S_OK && supported != FALSE; +} + +bool OutputSupportsFormat(IDeckLinkOutput* output, BMDDisplayMode displayMode, BMDPixelFormat pixelFormat) +{ + if (output == nullptr) + return false; + + BOOL supported = FALSE; + BMDDisplayMode actualMode = bmdModeUnknown; + const HRESULT result = output->DoesSupportVideoMode( + bmdVideoConnectionUnspecified, + displayMode, + pixelFormat, + bmdNoVideoOutputConversion, + bmdSupportedVideoModeDefault, + &actualMode, + &supported); + return result == S_OK && supported != FALSE; +} } DeckLinkSession::~DeckLinkSession() @@ -52,8 +89,6 @@ void DeckLinkSession::ReleaseResources() playoutDelegate.Release(); outputVideoFrameQueue.clear(); output.Release(); - - playoutAllocator.Release(); } bool DeckLinkSession::DiscoverDevicesAndModes(const VideoFormatSelection& videoModes, std::string& error) @@ -173,11 +208,75 @@ bool DeckLinkSession::DiscoverDevicesAndModes(const VideoFormatSelection& videoM inputDisplayModeName = "No input - black frame"; outputMode->GetFrameRate(&frameDuration, &frameTimescale); + inputFrameRowBytes = inputFrameSize.width * 2u; + outputFrameRowBytes = outputFrameSize.width * 4u; + captureTextureWidth = inputFrameSize.width / 2u; + outputPackTextureWidth = outputFrameSize.width; + + return true; +} + +bool DeckLinkSession::SelectPreferredFormats(const VideoFormatSelection& videoModes, std::string& error) +{ + if (!output) + { + error = "Expected an Output DeckLink device"; + return false; + } + + formatStatusMessage.clear(); + + const bool inputTenBitSupported = input != nullptr && InputSupportsFormat(input, videoModes.input.displayMode, bmdFormat10BitYUV); + inputPixelFormat = input != nullptr ? ChoosePreferredVideoIOFormat(inputTenBitSupported) : VideoIOPixelFormat::Uyvy8; + if (input != nullptr && !inputTenBitSupported) + formatStatusMessage += "DeckLink input does not report 10-bit YUV support for the configured mode; using 8-bit capture. "; + + const bool outputTenBitSupported = OutputSupportsFormat(output, videoModes.output.displayMode, bmdFormat10BitYUV); + outputPixelFormat = ChoosePreferredVideoIOFormat(outputTenBitSupported); + if (!outputTenBitSupported) + formatStatusMessage += "DeckLink output does not report 10-bit YUV support for the configured mode; using 8-bit BGRA output. "; + + int deckLinkOutputRowBytes = 0; + if (output->RowBytesForPixelFormat(OutputIsTenBit() ? bmdFormat10BitYUV : bmdFormat8BitBGRA, outputFrameSize.width, &deckLinkOutputRowBytes) != S_OK) + { + error = "DeckLink output setup failed while calculating output row bytes."; + return false; + } + outputFrameRowBytes = static_cast(deckLinkOutputRowBytes); + outputPackTextureWidth = OutputIsTenBit() + ? PackedTextureWidthFromRowBytes(outputFrameRowBytes) + : outputFrameSize.width; + + if (InputIsTenBit()) + { + int deckLinkInputRowBytes = 0; + if (output->RowBytesForPixelFormat(bmdFormat10BitYUV, inputFrameSize.width, &deckLinkInputRowBytes) == S_OK) + inputFrameRowBytes = static_cast(deckLinkInputRowBytes); + else + inputFrameRowBytes = MinimumV210RowBytes(inputFrameSize.width); + } + else + { + inputFrameRowBytes = inputFrameSize.width * 2u; + } + captureTextureWidth = InputIsTenBit() + ? PackedTextureWidthFromRowBytes(inputFrameRowBytes) + : inputFrameSize.width / 2u; + + std::ostringstream status; + status << "DeckLink formats: capture " << (input ? VideoIOPixelFormatName(inputPixelFormat) : "none") + << ", output " << (OutputIsTenBit() ? "10-bit YUV v210" : "8-bit BGRA") << "."; + if (!formatStatusMessage.empty()) + status << " " << formatStatusMessage; + formatStatusMessage = status.str(); return true; } bool DeckLinkSession::ConfigureInput(OpenGLComposite* owner, HDC hdc, HGLRC hglrc, const VideoFormat& inputVideoMode, std::string& error) { + (void)hdc; + (void)hglrc; + if (!input) { hasNoInputSource = true; @@ -185,10 +284,26 @@ bool DeckLinkSession::ConfigureInput(OpenGLComposite* owner, HDC hdc, HGLRC hglr return true; } - CComPtr captureAllocator(new (std::nothrow) InputAllocatorPool(hdc, hglrc)); - - if (input->EnableVideoInputWithAllocatorProvider(inputVideoMode.displayMode, bmdFormat8BitYUV, bmdVideoInputFlagDefault, captureAllocator) != S_OK) + const BMDPixelFormat deckLinkInputPixelFormat = DeckLinkPixelFormatForVideoIO(inputPixelFormat); + if (input->EnableVideoInput(inputVideoMode.displayMode, deckLinkInputPixelFormat, bmdVideoInputFlagDefault) != S_OK) { + if (inputPixelFormat == VideoIOPixelFormat::V210) + { + OutputDebugStringA("DeckLink 10-bit input could not be enabled; falling back to 8-bit capture.\n"); + inputPixelFormat = VideoIOPixelFormat::Uyvy8; + inputFrameRowBytes = inputFrameSize.width * 2u; + captureTextureWidth = inputFrameSize.width / 2u; + if (input->EnableVideoInput(inputVideoMode.displayMode, bmdFormat8BitYUV, bmdVideoInputFlagDefault) == S_OK) + { + std::ostringstream status; + status << "DeckLink formats: capture " << VideoIOPixelFormatName(inputPixelFormat) + << ", output " << (OutputIsTenBit() ? "10-bit YUV v210" : "8-bit BGRA") + << ". DeckLink 10-bit input enable failed; using 8-bit capture."; + formatStatusMessage = status.str(); + goto input_enabled; + } + } + OutputDebugStringA("DeckLink input could not be enabled; continuing in output-only black-frame mode.\n"); input.Release(); hasNoInputSource = true; @@ -196,6 +311,7 @@ bool DeckLinkSession::ConfigureInput(OpenGLComposite* owner, HDC hdc, HGLRC hglr return true; } +input_enabled: captureDelegate.Attach(new (std::nothrow) CaptureDelegate(owner)); if (captureDelegate == nullptr) { @@ -213,19 +329,8 @@ bool DeckLinkSession::ConfigureInput(OpenGLComposite* owner, HDC hdc, HGLRC hglr bool DeckLinkSession::ConfigureOutput(OpenGLComposite* owner, HDC hdc, HGLRC hglrc, const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error) { - int outputFrameRowBytes = 0; - if (output->RowBytesForPixelFormat(bmdFormat8BitBGRA, outputFrameSize.width, &outputFrameRowBytes) != S_OK) - { - error = "DeckLink output setup failed while calculating BGRA row bytes."; - return false; - } - - playoutAllocator.Attach(new (std::nothrow) PinnedMemoryAllocator(hdc, hglrc, VideoFrameTransfer::GPUtoCPU, 1, outputFrameRowBytes * outputFrameSize.height)); - if (playoutAllocator == nullptr) - { - error = "DeckLink output setup failed while creating the playout allocator."; - return false; - } + (void)hdc; + (void)hglrc; if (output->EnableVideoOutput(outputVideoMode.displayMode, bmdVideoOutputFlagDefault) != S_OK) { @@ -264,15 +369,9 @@ bool DeckLinkSession::ConfigureOutput(OpenGLComposite* owner, HDC hdc, HGLRC hgl for (int i = 0; i < 10; i++) { CComPtr outputFrame; - CComPtr outputFrameBuffer; - if (playoutAllocator->AllocateVideoBuffer(&outputFrameBuffer) != S_OK) - { - error = "DeckLink output setup failed while allocating an output frame buffer."; - return false; - } - - if (output->CreateVideoFrameWithBuffer(outputFrameSize.width, outputFrameSize.height, outputFrameRowBytes, bmdFormat8BitBGRA, bmdFrameFlagFlipVertical, outputFrameBuffer, &outputFrame) != S_OK) + const BMDPixelFormat deckLinkOutputPixelFormat = OutputIsTenBit() ? bmdFormat10BitYUV : bmdFormat8BitBGRA; + if (output->CreateVideoFrame(outputFrameSize.width, outputFrameSize.height, outputFrameRowBytes, deckLinkOutputPixelFormat, bmdFrameFlagFlipVertical, &outputFrame) != S_OK) { error = "DeckLink output setup failed while creating an output video frame."; return false; @@ -294,6 +393,9 @@ bool DeckLinkSession::ConfigureOutput(OpenGLComposite* owner, HDC hdc, HGLRC hgl return false; } + if (!formatStatusMessage.empty()) + statusMessage = statusMessage.empty() ? formatStatusMessage : formatStatusMessage + " " + statusMessage; + return true; } @@ -312,17 +414,6 @@ IDeckLinkMutableVideoFrame* DeckLinkSession::RotateOutputFrame() return outputVideoFrame.p; } -bool DeckLinkSession::TransferPlayoutFrame(void* address, GLuint outputTexture) -{ - return playoutAllocator != nullptr && playoutAllocator->transferFrame(address, outputTexture); -} - -void DeckLinkSession::WaitForPlayoutTransferComplete(void* address) -{ - if (playoutAllocator != nullptr) - playoutAllocator->waitForTransferComplete(address); -} - void DeckLinkSession::AccountForCompletionResult(BMDOutputFrameCompletionResult completionResult) { if (completionResult == bmdOutputFrameDisplayedLate || completionResult == bmdOutputFrameDropped) diff --git a/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkSession.h b/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkSession.h index 657d086..11a1464 100644 --- a/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkSession.h +++ b/apps/LoopThroughWithOpenGLCompositing/decklink/DeckLinkSession.h @@ -3,6 +3,7 @@ #include "DeckLinkAPI_h.h" #include "DeckLinkDisplayMode.h" #include "DeckLinkFrameTransfer.h" +#include "VideoIOFormat.h" #include #include @@ -18,6 +19,7 @@ public: void ReleaseResources(); bool DiscoverDevicesAndModes(const VideoFormatSelection& videoModes, std::string& error); + bool SelectPreferredFormats(const VideoFormatSelection& videoModes, std::string& error); bool ConfigureInput(OpenGLComposite* owner, HDC hdc, HGLRC hglrc, const VideoFormat& inputVideoMode, std::string& error); bool ConfigureOutput(OpenGLComposite* owner, HDC hdc, HGLRC hglrc, const VideoFormat& outputVideoMode, bool externalKeyingEnabled, std::string& error); bool Start(); @@ -33,6 +35,15 @@ public: unsigned InputFrameHeight() const { return inputFrameSize.height; } unsigned OutputFrameWidth() const { return outputFrameSize.width; } unsigned OutputFrameHeight() const { return outputFrameSize.height; } + VideoIOPixelFormat InputPixelFormat() const { return inputPixelFormat; } + VideoIOPixelFormat OutputPixelFormat() const { return outputPixelFormat; } + bool InputIsTenBit() const { return VideoIOPixelFormatIsTenBit(inputPixelFormat); } + bool OutputIsTenBit() const { return VideoIOPixelFormatIsTenBit(outputPixelFormat); } + unsigned InputFrameRowBytes() const { return inputFrameRowBytes; } + unsigned OutputFrameRowBytes() const { return outputFrameRowBytes; } + unsigned CaptureTextureWidth() const { return captureTextureWidth; } + unsigned OutputPackTextureWidth() const { return outputPackTextureWidth; } + const std::string& FormatStatusMessage() const { return formatStatusMessage; } const std::string& InputDisplayModeName() const { return inputDisplayModeName; } const std::string& OutputModelName() const { return outputModelName; } bool SupportsInternalKeying() const { return supportsInternalKeying; } @@ -43,8 +54,6 @@ public: void SetStatusMessage(const std::string& message) { statusMessage = message; } double FrameBudgetMilliseconds() const; IDeckLinkMutableVideoFrame* RotateOutputFrame(); - bool TransferPlayoutFrame(void* address, GLuint outputTexture); - void WaitForPlayoutTransferComplete(void* address); void AccountForCompletionResult(BMDOutputFrameCompletionResult completionResult); bool ScheduleOutputFrame(IDeckLinkMutableVideoFrame* outputVideoFrame); @@ -55,12 +64,17 @@ private: CComPtr output; CComPtr keyer; std::deque> outputVideoFrameQueue; - CComPtr playoutAllocator; BMDTimeValue frameDuration = 0; BMDTimeScale frameTimescale = 0; unsigned totalPlayoutFrames = 0; FrameSize inputFrameSize; FrameSize outputFrameSize; + VideoIOPixelFormat inputPixelFormat = VideoIOPixelFormat::Uyvy8; + VideoIOPixelFormat outputPixelFormat = VideoIOPixelFormat::Uyvy8; + unsigned inputFrameRowBytes = 0; + unsigned outputFrameRowBytes = 0; + unsigned captureTextureWidth = 0; + unsigned outputPackTextureWidth = 0; std::string inputDisplayModeName = "1080p59.94"; std::string outputDisplayModeName = "1080p59.94"; bool hasNoInputSource = true; @@ -70,4 +84,5 @@ private: bool keyerInterfaceAvailable = false; bool externalKeyingActive = false; std::string statusMessage; + std::string formatStatusMessage; }; diff --git a/apps/LoopThroughWithOpenGLCompositing/decklink/VideoIOFormat.cpp b/apps/LoopThroughWithOpenGLCompositing/decklink/VideoIOFormat.cpp new file mode 100644 index 0000000..fb62df4 --- /dev/null +++ b/apps/LoopThroughWithOpenGLCompositing/decklink/VideoIOFormat.cpp @@ -0,0 +1,139 @@ +#include "VideoIOFormat.h" + +#include +#include + +#ifdef min +#undef min +#endif +#ifdef max +#undef max +#endif + +namespace +{ +uint16_t Clamp10(int value, int minimum, int maximum) +{ + return static_cast(std::max(minimum, std::min(maximum, value))); +} + +uint32_t MakeV210Word(uint16_t a, uint16_t b, uint16_t c) +{ + return (static_cast(a) & 0x3ffu) + | ((static_cast(b) & 0x3ffu) << 10) + | ((static_cast(c) & 0x3ffu) << 20); +} + +void StoreWord(std::array& bytes, std::size_t wordIndex, uint32_t word) +{ + const std::size_t offset = wordIndex * 4; + bytes[offset + 0] = static_cast(word & 0xffu); + bytes[offset + 1] = static_cast((word >> 8) & 0xffu); + bytes[offset + 2] = static_cast((word >> 16) & 0xffu); + bytes[offset + 3] = static_cast((word >> 24) & 0xffu); +} + +uint32_t LoadWord(const std::array& bytes, std::size_t wordIndex) +{ + const std::size_t offset = wordIndex * 4; + return static_cast(bytes[offset + 0]) + | (static_cast(bytes[offset + 1]) << 8) + | (static_cast(bytes[offset + 2]) << 16) + | (static_cast(bytes[offset + 3]) << 24); +} + +uint16_t Component(uint32_t word, unsigned index) +{ + return static_cast((word >> (index * 10)) & 0x3ffu); +} +} + +const char* VideoIOPixelFormatName(VideoIOPixelFormat format) +{ + return format == VideoIOPixelFormat::V210 ? "10-bit YUV v210" : "8-bit YUV UYVY"; +} + +bool VideoIOPixelFormatIsTenBit(VideoIOPixelFormat format) +{ + return format == VideoIOPixelFormat::V210; +} + +BMDPixelFormat DeckLinkPixelFormatForVideoIO(VideoIOPixelFormat format) +{ + return format == VideoIOPixelFormat::V210 ? bmdFormat10BitYUV : bmdFormat8BitYUV; +} + +VideoIOPixelFormat VideoIOPixelFormatFromDeckLink(BMDPixelFormat format) +{ + return format == bmdFormat10BitYUV ? VideoIOPixelFormat::V210 : VideoIOPixelFormat::Uyvy8; +} + +VideoIOPixelFormat ChoosePreferredVideoIOFormat(bool tenBitSupported) +{ + return tenBitSupported ? VideoIOPixelFormat::V210 : VideoIOPixelFormat::Uyvy8; +} + +unsigned PackedTextureWidthFromRowBytes(unsigned rowBytes) +{ + return (rowBytes + 3u) / 4u; +} + +unsigned MinimumV210RowBytes(unsigned frameWidth) +{ + return ((frameWidth + 5u) / 6u) * 16u; +} + +unsigned ActiveV210WordsForWidth(unsigned frameWidth) +{ + return ((frameWidth + 5u) / 6u) * 4u; +} + +V210CodeValues Rec709RgbToLegalV210(float red, float green, float blue) +{ + red = std::max(0.0f, std::min(1.0f, red)); + green = std::max(0.0f, std::min(1.0f, green)); + blue = std::max(0.0f, std::min(1.0f, blue)); + + const float y = 0.2126f * red + 0.7152f * green + 0.0722f * blue; + const float cb = (blue - y) / 1.8556f + 0.5f; + const float cr = (red - y) / 1.5748f + 0.5f; + + V210CodeValues values; + values.y = Clamp10(static_cast(std::lround(64.0f + y * 876.0f)), 64, 940); + values.cb = Clamp10(static_cast(std::lround(64.0f + cb * 896.0f)), 64, 960); + values.cr = Clamp10(static_cast(std::lround(64.0f + cr * 896.0f)), 64, 960); + return values; +} + +std::array PackV210Block(const V210SixPixelBlock& block) +{ + std::array bytes = {}; + StoreWord(bytes, 0, MakeV210Word(block.cb[0], block.y[0], block.cr[0])); + StoreWord(bytes, 1, MakeV210Word(block.y[1], block.cb[1], block.y[2])); + StoreWord(bytes, 2, MakeV210Word(block.cr[1], block.y[3], block.cb[2])); + StoreWord(bytes, 3, MakeV210Word(block.y[4], block.cr[2], block.y[5])); + return bytes; +} + +V210SixPixelBlock UnpackV210Block(const std::array& bytes) +{ + const uint32_t word0 = LoadWord(bytes, 0); + const uint32_t word1 = LoadWord(bytes, 1); + const uint32_t word2 = LoadWord(bytes, 2); + const uint32_t word3 = LoadWord(bytes, 3); + + V210SixPixelBlock block; + block.cb[0] = Component(word0, 0); + block.y[0] = Component(word0, 1); + block.cr[0] = Component(word0, 2); + block.y[1] = Component(word1, 0); + block.cb[1] = Component(word1, 1); + block.y[2] = Component(word1, 2); + block.cr[1] = Component(word2, 0); + block.y[3] = Component(word2, 1); + block.cb[2] = Component(word2, 2); + block.y[4] = Component(word3, 0); + block.cr[2] = Component(word3, 1); + block.y[5] = Component(word3, 2); + return block; +} diff --git a/apps/LoopThroughWithOpenGLCompositing/decklink/VideoIOFormat.h b/apps/LoopThroughWithOpenGLCompositing/decklink/VideoIOFormat.h new file mode 100644 index 0000000..fd49f30 --- /dev/null +++ b/apps/LoopThroughWithOpenGLCompositing/decklink/VideoIOFormat.h @@ -0,0 +1,38 @@ +#pragma once + +#include "DeckLinkAPI_h.h" + +#include +#include + +enum class VideoIOPixelFormat +{ + Uyvy8, + V210 +}; + +struct V210CodeValues +{ + uint16_t y = 64; + uint16_t cb = 512; + uint16_t cr = 512; +}; + +struct V210SixPixelBlock +{ + std::array y = {}; + std::array cb = {}; + std::array cr = {}; +}; + +const char* VideoIOPixelFormatName(VideoIOPixelFormat format); +bool VideoIOPixelFormatIsTenBit(VideoIOPixelFormat format); +BMDPixelFormat DeckLinkPixelFormatForVideoIO(VideoIOPixelFormat format); +VideoIOPixelFormat VideoIOPixelFormatFromDeckLink(BMDPixelFormat format); +VideoIOPixelFormat ChoosePreferredVideoIOFormat(bool tenBitSupported); +unsigned PackedTextureWidthFromRowBytes(unsigned rowBytes); +unsigned MinimumV210RowBytes(unsigned frameWidth); +unsigned ActiveV210WordsForWidth(unsigned frameWidth); +V210CodeValues Rec709RgbToLegalV210(float red, float green, float blue); +std::array PackV210Block(const V210SixPixelBlock& block); +V210SixPixelBlock UnpackV210Block(const std::array& bytes); diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/GLExtensions.h b/apps/LoopThroughWithOpenGLCompositing/gl/GLExtensions.h index e89b969..8425bf8 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/GLExtensions.h +++ b/apps/LoopThroughWithOpenGLCompositing/gl/GLExtensions.h @@ -60,6 +60,9 @@ #define GL_DYNAMIC_DRAW 0x88E8 #define GL_UNIFORM_BUFFER 0x8A11 #define GL_RGBA8 0x8058 +#define GL_RGBA16F 0x881A +#define GL_TEXTURE0 0x84C0 +#define GL_ACTIVE_TEXTURE 0x84E0 #define GL_ARRAY_BUFFER 0x8892 #define GL_PIXEL_PACK_BUFFER 0x88EB #define GL_PIXEL_UNPACK_BUFFER 0x88EC diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/GlShaderSources.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/GlShaderSources.cpp index 6a30536..5ce4f0e 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/GlShaderSources.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/gl/GlShaderSources.cpp @@ -16,6 +16,7 @@ const char* kDecodeFragmentShaderSource = "layout(binding = 2) uniform sampler2D uPackedVideoInput;\n" "uniform vec2 uPackedVideoResolution;\n" "uniform vec2 uDecodedVideoResolution;\n" + "uniform int uInputPixelFormat;\n" "in vec2 vTexCoord;\n" "layout(location = 0) out vec4 fragColor;\n" "vec4 rec709YCbCr2rgba(float Y, float Cb, float Cr, float a)\n" @@ -25,14 +26,116 @@ const char* kDecodeFragmentShaderSource = " Cr = (Cr * 256.0 - 16.0) / 224.0 - 0.5;\n" " return vec4(Y + 1.5748 * Cr, Y - 0.1873 * Cb - 0.4681 * Cr, Y + 1.8556 * Cb, a);\n" "}\n" + "vec4 rec709YCbCr10_2rgba(float Y, float Cb, float Cr, float a)\n" + "{\n" + " Y = (Y - 64.0) / 876.0;\n" + " Cb = (Cb - 64.0) / 896.0 - 0.5;\n" + " Cr = (Cr - 64.0) / 896.0 - 0.5;\n" + " return vec4(Y + 1.5748 * Cr, Y - 0.1873 * Cb - 0.4681 * Cr, Y + 1.8556 * Cb, a);\n" + "}\n" + "uint loadV210Word(ivec2 coord)\n" + "{\n" + " vec4 b = round(texelFetch(uPackedVideoInput, coord, 0) * 255.0);\n" + " return uint(b.r) | (uint(b.g) << 8) | (uint(b.b) << 16) | (uint(b.a) << 24);\n" + "}\n" + "float v210Component(uint word, int index)\n" + "{\n" + " return float((word >> uint(index * 10)) & 1023u);\n" + "}\n" + "vec4 decodeUyvy8(ivec2 outputCoord, ivec2 packedSize)\n" + "{\n" + " ivec2 packedCoord = ivec2(clamp(outputCoord.x / 2, 0, packedSize.x - 1), clamp(outputCoord.y, 0, packedSize.y - 1));\n" + " vec4 macroPixel = texelFetch(uPackedVideoInput, packedCoord, 0);\n" + " float ySample = (outputCoord.x & 1) != 0 ? macroPixel.a : macroPixel.g;\n" + " return rec709YCbCr2rgba(ySample, macroPixel.b, macroPixel.r, 1.0);\n" + "}\n" + "vec4 decodeV210(ivec2 outputCoord, ivec2 packedSize)\n" + "{\n" + " int group = outputCoord.x / 6;\n" + " int pixel = outputCoord.x - group * 6;\n" + " int wordBase = group * 4;\n" + " ivec2 rowBase = ivec2(wordBase, clamp(outputCoord.y, 0, packedSize.y - 1));\n" + " uint w0 = loadV210Word(ivec2(min(rowBase.x + 0, packedSize.x - 1), rowBase.y));\n" + " uint w1 = loadV210Word(ivec2(min(rowBase.x + 1, packedSize.x - 1), rowBase.y));\n" + " uint w2 = loadV210Word(ivec2(min(rowBase.x + 2, packedSize.x - 1), rowBase.y));\n" + " uint w3 = loadV210Word(ivec2(min(rowBase.x + 3, packedSize.x - 1), rowBase.y));\n" + " float y0 = v210Component(w0, 1);\n" + " float y1 = v210Component(w1, 0);\n" + " float y2 = v210Component(w1, 2);\n" + " float y3 = v210Component(w2, 1);\n" + " float y4 = v210Component(w3, 0);\n" + " float y5 = v210Component(w3, 2);\n" + " float cb0 = v210Component(w0, 0);\n" + " float cr0 = v210Component(w0, 2);\n" + " float cb2 = v210Component(w1, 1);\n" + " float cr2 = v210Component(w2, 0);\n" + " float cb4 = v210Component(w2, 2);\n" + " float cr4 = v210Component(w3, 1);\n" + " float ySample = pixel == 0 ? y0 : pixel == 1 ? y1 : pixel == 2 ? y2 : pixel == 3 ? y3 : pixel == 4 ? y4 : y5;\n" + " float cbSample = pixel < 2 ? cb0 : pixel < 4 ? cb2 : cb4;\n" + " float crSample = pixel < 2 ? cr0 : pixel < 4 ? cr2 : cr4;\n" + " return rec709YCbCr10_2rgba(ySample, cbSample, crSample, 1.0);\n" + "}\n" "void main()\n" "{\n" " vec2 correctedUv = vec2(vTexCoord.x, 1.0 - vTexCoord.y);\n" " ivec2 decodedSize = ivec2(max(uDecodedVideoResolution, vec2(1.0, 1.0)));\n" " ivec2 outputCoord = clamp(ivec2(correctedUv * vec2(decodedSize)), ivec2(0, 0), decodedSize - ivec2(1, 1));\n" " ivec2 packedSize = ivec2(max(uPackedVideoResolution, vec2(1.0, 1.0)));\n" - " ivec2 packedCoord = ivec2(clamp(outputCoord.x / 2, 0, packedSize.x - 1), clamp(outputCoord.y, 0, packedSize.y - 1));\n" - " vec4 macroPixel = texelFetch(uPackedVideoInput, packedCoord, 0);\n" - " float ySample = (outputCoord.x & 1) != 0 ? macroPixel.a : macroPixel.g;\n" - " fragColor = rec709YCbCr2rgba(ySample, macroPixel.b, macroPixel.r, 1.0);\n" + " fragColor = uInputPixelFormat == 1 ? decodeV210(outputCoord, packedSize) : decodeUyvy8(outputCoord, packedSize);\n" + "}\n"; + +const char* kOutputPackFragmentShaderSource = + "#version 430 core\n" + "layout(binding = 0) uniform sampler2D uOutputRgb;\n" + "uniform vec2 uOutputVideoResolution;\n" + "uniform float uActiveV210Words;\n" + "in vec2 vTexCoord;\n" + "layout(location = 0) out vec4 fragColor;\n" + "vec3 rgbAt(int x, int y)\n" + "{\n" + " ivec2 size = ivec2(max(uOutputVideoResolution, vec2(1.0, 1.0)));\n" + " return clamp(texelFetch(uOutputRgb, ivec2(clamp(x, 0, size.x - 1), clamp(y, 0, size.y - 1)), 0).rgb, vec3(0.0), vec3(1.0));\n" + "}\n" + "vec3 rgbToLegalYcbcr10(vec3 rgb)\n" + "{\n" + " float y = dot(rgb, vec3(0.2126, 0.7152, 0.0722));\n" + " float cb = (rgb.b - y) / 1.8556 + 0.5;\n" + " float cr = (rgb.r - y) / 1.5748 + 0.5;\n" + " return vec3(clamp(round(64.0 + y * 876.0), 64.0, 940.0), clamp(round(64.0 + cb * 896.0), 64.0, 960.0), clamp(round(64.0 + cr * 896.0), 64.0, 960.0));\n" + "}\n" + "uint makeWord(float a, float b, float c)\n" + "{\n" + " return (uint(a) & 1023u) | ((uint(b) & 1023u) << 10) | ((uint(c) & 1023u) << 20);\n" + "}\n" + "vec4 wordToBytes(uint word)\n" + "{\n" + " return vec4(float(word & 255u), float((word >> 8) & 255u), float((word >> 16) & 255u), float((word >> 24) & 255u)) / 255.0;\n" + "}\n" + "void main()\n" + "{\n" + " ivec2 outCoord = ivec2(gl_FragCoord.xy);\n" + " if (float(outCoord.x) >= uActiveV210Words)\n" + " {\n" + " fragColor = vec4(0.0);\n" + " return;\n" + " }\n" + " int group = outCoord.x / 4;\n" + " int wordIndex = outCoord.x - group * 4;\n" + " int pixelBase = group * 6;\n" + " int y = outCoord.y;\n" + " vec3 c0 = rgbToLegalYcbcr10(rgbAt(pixelBase + 0, y));\n" + " vec3 c1 = rgbToLegalYcbcr10(rgbAt(pixelBase + 1, y));\n" + " vec3 c2 = rgbToLegalYcbcr10(rgbAt(pixelBase + 2, y));\n" + " vec3 c3 = rgbToLegalYcbcr10(rgbAt(pixelBase + 3, y));\n" + " vec3 c4 = rgbToLegalYcbcr10(rgbAt(pixelBase + 4, y));\n" + " vec3 c5 = rgbToLegalYcbcr10(rgbAt(pixelBase + 5, y));\n" + " float cb0 = round((c0.y + c1.y) * 0.5);\n" + " float cr0 = round((c0.z + c1.z) * 0.5);\n" + " float cb2 = round((c2.y + c3.y) * 0.5);\n" + " float cr2 = round((c2.z + c3.z) * 0.5);\n" + " float cb4 = round((c4.y + c5.y) * 0.5);\n" + " float cr4 = round((c4.z + c5.z) * 0.5);\n" + " uint word = wordIndex == 0 ? makeWord(cb0, c0.x, cr0) : wordIndex == 1 ? makeWord(c1.x, cb2, c2.x) : wordIndex == 2 ? makeWord(cr2, c3.x, cb4) : makeWord(c4.x, cr4, c5.x);\n" + " fragColor = wordToBytes(word);\n" "}\n"; diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/GlShaderSources.h b/apps/LoopThroughWithOpenGLCompositing/gl/GlShaderSources.h index 2e67ec0..b7fcd2e 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/GlShaderSources.h +++ b/apps/LoopThroughWithOpenGLCompositing/gl/GlShaderSources.h @@ -2,3 +2,4 @@ extern const char* kFullscreenTriangleVertexShaderSource; extern const char* kDecodeFragmentShaderSource; +extern const char* kOutputPackFragmentShaderSource; diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLComposite.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLComposite.cpp index b349eb2..7fc80ef 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLComposite.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLComposite.cpp @@ -1,5 +1,4 @@ #include "DeckLinkDisplayMode.h" -#include "DeckLinkFrameTransfer.h" #include "DeckLinkSession.h" #include "OpenGLComposite.h" #include "GLExtensions.h" @@ -87,17 +86,14 @@ bool OpenGLComposite::InitDeckLink() MessageBoxA(NULL, initFailureReason.c_str(), title, MB_OK | MB_ICONERROR); return false; } + if (!mDeckLink->SelectPreferredFormats(videoModes, initFailureReason)) + goto error; if (! CheckOpenGLExtensions()) { initFailureReason = "OpenGL extension checks failed."; goto error; } - if (mDeckLink->InputOutputDimensionsDiffer()) - { - mRenderer->SetFastTransferAvailable(false); - OutputDebugStringA("Input/output dimensions differ; using regular OpenGL transfer fallback instead of fast transfer.\n"); - } if (! InitOpenGLState()) { @@ -115,16 +111,6 @@ bool OpenGLComposite::InitDeckLink() else resizeWindow(mDeckLink->OutputFrameWidth() / 2, mDeckLink->OutputFrameHeight() / 2); - if (mRenderer->FastTransferAvailable()) - { - // Initialize fast video frame transfers - if (! VideoFrameTransfer::initialize(mDeckLink->InputFrameWidth(), mDeckLink->InputFrameHeight(), mRenderer->CaptureTexture(), mRenderer->OutputTexture())) - { - MessageBox(NULL, _T("Cannot initialize video transfers."), _T("VideoFrameTransfer error."), MB_OK); - goto error; - } - } - if (!mDeckLink->ConfigureInput(this, hGLDC, hGLRC, videoModes.input, initFailureReason)) { goto error; @@ -222,6 +208,11 @@ bool OpenGLComposite::InitOpenGLState() MessageBoxA(NULL, compilerErrorMessage, "OpenGL decode shader failed to load or compile", MB_OK); return false; } + if (!mShaderPrograms->CompileOutputPackShader(sizeof(compilerErrorMessage), compilerErrorMessage)) + { + MessageBoxA(NULL, compilerErrorMessage, "OpenGL output pack shader failed to load or compile", MB_OK); + return false; + } if (!mShaderPrograms->CompileLayerPrograms(mDeckLink->InputFrameWidth(), mDeckLink->InputFrameHeight(), sizeof(compilerErrorMessage), compilerErrorMessage)) { @@ -233,7 +224,14 @@ bool OpenGLComposite::InitOpenGLState() mShaderPrograms->ResetTemporalHistoryState(); std::string rendererError; - if (!mRenderer->InitializeResources(mDeckLink->InputFrameWidth(), mDeckLink->InputFrameHeight(), mDeckLink->OutputFrameWidth(), mDeckLink->OutputFrameHeight(), rendererError)) + if (!mRenderer->InitializeResources( + mDeckLink->InputFrameWidth(), + mDeckLink->InputFrameHeight(), + mDeckLink->CaptureTextureWidth(), + mDeckLink->OutputFrameWidth(), + mDeckLink->OutputFrameHeight(), + mDeckLink->OutputPackTextureWidth(), + rendererError)) { MessageBoxA(NULL, rendererError.c_str(), "OpenGL initialization error.", MB_OK); return false; @@ -315,6 +313,8 @@ void OpenGLComposite::renderEffect() layerStates, mDeckLink->InputFrameWidth(), mDeckLink->InputFrameHeight(), + mDeckLink->CaptureTextureWidth(), + mDeckLink->InputPixelFormat(), historyCap, [this](const RuntimeRenderState& state, LayerProgram::TextBinding& textBinding, std::string& error) { return mShaderPrograms->UpdateTextBindingTexture(state, textBinding, error); @@ -392,11 +392,6 @@ void OpenGLComposite::resetTemporalHistoryState() bool OpenGLComposite::CheckOpenGLExtensions() { - mRenderer->SetFastTransferAvailable(VideoFrameTransfer::checkFastMemoryTransferAvailable()); - - if (!mRenderer->FastTransferAvailable()) - OutputDebugStringA("Fast memory transfer extension not available, using regular OpenGL transfer fallback instead\n"); - return true; } diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLDeckLinkBridge.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLDeckLinkBridge.cpp index 875c945..6f1ef76 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLDeckLinkBridge.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLDeckLinkBridge.cpp @@ -1,12 +1,9 @@ #include "OpenGLDeckLinkBridge.h" -#include "DeckLinkFrameTransfer.h" #include "DeckLinkSession.h" #include "OpenGLRenderer.h" #include "RuntimeHost.h" -#include "VideoFrameTransfer.h" -#include #include #include @@ -87,29 +84,20 @@ void OpenGLDeckLinkBridge::VideoFrameArrived(IDeckLinkVideoInputFrame* inputFram wglMakeCurrent(mHdc, mHglrc); // make OpenGL context current in this thread - if (mRenderer.FastTransferAvailable()) - { - CComQIPtr allocator(inputFrameBuffer); - if (!allocator || !allocator->transferFrame(videoPixels, mRenderer.CaptureTexture())) - OutputDebugStringA("Capture: transferFrame() failed\n"); + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mRenderer.TextureUploadBuffer()); + glBufferData(GL_PIXEL_UNPACK_BUFFER, textureSize, videoPixels, GL_DYNAMIC_DRAW); + glBindTexture(GL_TEXTURE_2D, mRenderer.CaptureTexture()); - allocator->waitForTransferComplete(videoPixels); - } + // NULL for last arg indicates use current GL_PIXEL_UNPACK_BUFFER target as texture data. + if (mDeckLink.InputPixelFormat() == VideoIOPixelFormat::V210) + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mDeckLink.CaptureTextureWidth(), mDeckLink.InputFrameHeight(), GL_RGBA, GL_UNSIGNED_BYTE, NULL); else - { - // Use a straightforward texture buffer - glPixelStorei(GL_UNPACK_ALIGNMENT, 4); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mRenderer.UnpinnedTextureBuffer()); - glBufferData(GL_PIXEL_UNPACK_BUFFER, textureSize, videoPixels, GL_DYNAMIC_DRAW); - glBindTexture(GL_TEXTURE_2D, mRenderer.CaptureTexture()); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mDeckLink.CaptureTextureWidth(), mDeckLink.InputFrameHeight(), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); - // NULL for last arg indicates use current GL_PIXEL_UNPACK_BUFFER target as texture data - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mDeckLink.InputFrameWidth() / 2, mDeckLink.InputFrameHeight(), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); - - glBindTexture(GL_TEXTURE_2D, 0); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - } + glBindTexture(GL_TEXTURE_2D, 0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); wglMakeCurrent(NULL, NULL); @@ -135,17 +123,35 @@ void OpenGLDeckLinkBridge::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedF // Draw the effect output to the off-screen framebuffer. const auto renderStartTime = std::chrono::steady_clock::now(); - if (mRenderer.FastTransferAvailable()) - VideoFrameTransfer::beginTextureInUse(VideoFrameTransfer::GPUtoCPU); glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.CompositeFramebuffer()); mRenderEffect(); glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.CompositeFramebuffer()); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mRenderer.OutputFramebuffer()); glBlitFramebuffer(0, 0, mDeckLink.InputFrameWidth(), mDeckLink.InputFrameHeight(), 0, 0, mDeckLink.OutputFrameWidth(), mDeckLink.OutputFrameHeight(), GL_COLOR_BUFFER_BIT, GL_LINEAR); glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputFramebuffer()); + if (mDeckLink.OutputIsTenBit()) + { + glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.OutputPackFramebuffer()); + glViewport(0, 0, mDeckLink.OutputPackTextureWidth(), mDeckLink.OutputFrameHeight()); + glDisable(GL_SCISSOR_TEST); + glDisable(GL_BLEND); + glDisable(GL_DEPTH_TEST); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, mRenderer.OutputTexture()); + glBindVertexArray(mRenderer.FullscreenVertexArray()); + glUseProgram(mRenderer.OutputPackProgram()); + const GLint outputResolutionLocation = glGetUniformLocation(mRenderer.OutputPackProgram(), "uOutputVideoResolution"); + const GLint activeWordsLocation = glGetUniformLocation(mRenderer.OutputPackProgram(), "uActiveV210Words"); + if (outputResolutionLocation >= 0) + glUniform2f(outputResolutionLocation, static_cast(mDeckLink.OutputFrameWidth()), static_cast(mDeckLink.OutputFrameHeight())); + if (activeWordsLocation >= 0) + glUniform1f(activeWordsLocation, static_cast(ActiveV210WordsForWidth(mDeckLink.OutputFrameWidth()))); + glDrawArrays(GL_TRIANGLES, 0, 3); + glUseProgram(0); + glBindVertexArray(0); + glBindTexture(GL_TEXTURE_2D, 0); + } glFlush(); - if (mRenderer.FastTransferAvailable()) - VideoFrameTransfer::endTextureInUse(VideoFrameTransfer::GPUtoCPU); const auto renderEndTime = std::chrono::steady_clock::now(); const double frameBudgetMilliseconds = mDeckLink.FrameBudgetMilliseconds(); const double renderMilliseconds = std::chrono::duration_cast>(renderEndTime - renderStartTime).count(); @@ -171,23 +177,19 @@ void OpenGLDeckLinkBridge::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedF void* pFrame; outputVideoFrameBuffer->GetBytes(&pFrame); - if (mRenderer.FastTransferAvailable()) + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + if (mDeckLink.OutputIsTenBit()) { - if (!mDeckLink.TransferPlayoutFrame(pFrame, mRenderer.OutputTexture())) - OutputDebugStringA("Playback: transferFrame() failed\n"); - - // Wait for transfer to system memory to complete - mDeckLink.WaitForPlayoutTransferComplete(pFrame); - mPaint(); + glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputPackFramebuffer()); + glReadPixels(0, 0, mDeckLink.OutputPackTextureWidth(), mDeckLink.OutputFrameHeight(), GL_RGBA, GL_UNSIGNED_BYTE, pFrame); } else { - glPixelStorei(GL_PACK_ALIGNMENT, 4); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); glBindFramebuffer(GL_READ_FRAMEBUFFER, mRenderer.OutputFramebuffer()); glReadPixels(0, 0, mDeckLink.OutputFrameWidth(), mDeckLink.OutputFrameHeight(), GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, pFrame); - mPaint(); } + mPaint(); outputVideoFrameBuffer->EndAccess(bmdBufferAccessWrite); outputVideoFrameBuffer->Release(); diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderPass.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderPass.cpp index 10997ba..8c86fb9 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderPass.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderPass.cpp @@ -1,7 +1,6 @@ #include "OpenGLRenderPass.h" #include "GlRenderConstants.h" -#include "VideoFrameTransfer.h" OpenGLRenderPass::OpenGLRenderPass(OpenGLRenderer& renderer) : mRenderer(renderer) @@ -13,22 +12,18 @@ void OpenGLRenderPass::Render( const std::vector& layerStates, unsigned inputFrameWidth, unsigned inputFrameHeight, + unsigned captureTextureWidth, + VideoIOPixelFormat inputPixelFormat, unsigned historyCap, const TextBindingUpdater& updateTextBinding, const GlobalParamsUpdater& updateGlobalParams) { - if (hasInputSource && mRenderer.FastTransferAvailable()) - { - // Signal that the capture texture is about to be sampled into the composite framebuffer. - VideoFrameTransfer::beginTextureInUse(VideoFrameTransfer::CPUtoGPU); - } - glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); glDisable(GL_DEPTH_TEST); if (hasInputSource) { - RenderDecodePass(inputFrameWidth, inputFrameHeight); + RenderDecodePass(inputFrameWidth, inputFrameHeight, captureTextureWidth, inputPixelFormat); } else { @@ -72,12 +67,9 @@ void OpenGLRenderPass::Render( } mRenderer.TemporalHistory().PushSourceFramebuffer(mRenderer.DecodeFramebuffer(), inputFrameWidth, inputFrameHeight); - - if (hasInputSource && mRenderer.FastTransferAvailable()) - VideoFrameTransfer::endTextureInUse(VideoFrameTransfer::CPUtoGPU); } -void OpenGLRenderPass::RenderDecodePass(unsigned inputFrameWidth, unsigned inputFrameHeight) +void OpenGLRenderPass::RenderDecodePass(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned captureTextureWidth, VideoIOPixelFormat inputPixelFormat) { glBindFramebuffer(GL_FRAMEBUFFER, mRenderer.DecodeFramebuffer()); glViewport(0, 0, inputFrameWidth, inputFrameHeight); @@ -89,10 +81,13 @@ void OpenGLRenderPass::RenderDecodePass(unsigned inputFrameWidth, unsigned input const GLint packedResolutionLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uPackedVideoResolution"); const GLint decodedResolutionLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uDecodedVideoResolution"); + const GLint inputPixelFormatLocation = glGetUniformLocation(mRenderer.DecodeProgram(), "uInputPixelFormat"); if (packedResolutionLocation >= 0) - glUniform2f(packedResolutionLocation, static_cast(inputFrameWidth / 2), static_cast(inputFrameHeight)); + glUniform2f(packedResolutionLocation, static_cast(captureTextureWidth), static_cast(inputFrameHeight)); if (decodedResolutionLocation >= 0) glUniform2f(decodedResolutionLocation, static_cast(inputFrameWidth), static_cast(inputFrameHeight)); + if (inputPixelFormatLocation >= 0) + glUniform1i(inputPixelFormatLocation, inputPixelFormat == VideoIOPixelFormat::V210 ? 1 : 0); glDrawArrays(GL_TRIANGLES, 0, 3); diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderPass.h b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderPass.h index 97ba462..43beb39 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderPass.h +++ b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderPass.h @@ -2,6 +2,7 @@ #include "OpenGLRenderer.h" #include "ShaderTypes.h" +#include "VideoIOFormat.h" #include #include @@ -21,12 +22,14 @@ public: const std::vector& layerStates, unsigned inputFrameWidth, unsigned inputFrameHeight, + unsigned captureTextureWidth, + VideoIOPixelFormat inputPixelFormat, unsigned historyCap, const TextBindingUpdater& updateTextBinding, const GlobalParamsUpdater& updateGlobalParams); private: - void RenderDecodePass(unsigned inputFrameWidth, unsigned inputFrameHeight); + void RenderDecodePass(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned captureTextureWidth, VideoIOPixelFormat inputPixelFormat); void RenderShaderProgram( GLuint sourceTexture, GLuint destinationFrameBuffer, diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderer.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderer.cpp index 75ceb7d..19fbd8e 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderer.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderer.cpp @@ -4,43 +4,52 @@ namespace { - void ConfigureFrameTexture(unsigned width, unsigned height) + void ConfigureByteFrameTexture(unsigned width, unsigned height) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + } + + void ConfigureDisplayFrameTexture(unsigned width, unsigned height) + { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, width, height, 0, GL_RGBA, GL_FLOAT, NULL); } } -bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned outputFrameWidth, unsigned outputFrameHeight, std::string& error) +bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned captureTextureWidth, unsigned outputFrameWidth, unsigned outputFrameHeight, unsigned outputPackTextureWidth, std::string& error) { glClearColor(0.0f, 0.0f, 0.0f, 0.5f); glDisable(GL_DEPTH_TEST); - if (!mFastTransferExtensionAvailable) - glGenBuffers(1, &mUnpinnedTextureBuffer); + glGenBuffers(1, &mTextureUploadBuffer); glGenTextures(1, &mCaptureTexture); glBindTexture(GL_TEXTURE_2D, mCaptureTexture); - ConfigureFrameTexture(inputFrameWidth / 2, inputFrameHeight); + ConfigureByteFrameTexture(captureTextureWidth, inputFrameHeight); glBindTexture(GL_TEXTURE_2D, 0); glGenTextures(1, &mDecodedTexture); glBindTexture(GL_TEXTURE_2D, mDecodedTexture); - ConfigureFrameTexture(inputFrameWidth, inputFrameHeight); + ConfigureDisplayFrameTexture(inputFrameWidth, inputFrameHeight); glBindTexture(GL_TEXTURE_2D, 0); glGenTextures(1, &mLayerTempTexture); glBindTexture(GL_TEXTURE_2D, mLayerTempTexture); - ConfigureFrameTexture(inputFrameWidth, inputFrameHeight); + ConfigureDisplayFrameTexture(inputFrameWidth, inputFrameHeight); glBindTexture(GL_TEXTURE_2D, 0); glGenFramebuffers(1, &mDecodeFrameBuf); glGenFramebuffers(1, &mLayerTempFrameBuf); glGenFramebuffers(1, &mIdFrameBuf); glGenFramebuffers(1, &mOutputFrameBuf); + glGenFramebuffers(1, &mOutputPackFrameBuf); glGenRenderbuffers(1, &mIdColorBuf); glGenRenderbuffers(1, &mIdDepthBuf); glGenVertexArrays(1, &mFullscreenVAO); @@ -65,7 +74,7 @@ bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inpu glBindFramebuffer(GL_FRAMEBUFFER, mIdFrameBuf); glGenTextures(1, &mFBOTexture); glBindTexture(GL_TEXTURE_2D, mFBOTexture); - ConfigureFrameTexture(inputFrameWidth, inputFrameHeight); + ConfigureDisplayFrameTexture(inputFrameWidth, inputFrameHeight); glBindRenderbuffer(GL_RENDERBUFFER, mIdDepthBuf); glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, inputFrameWidth, inputFrameHeight); @@ -79,7 +88,7 @@ bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inpu glGenTextures(1, &mOutputTexture); glBindTexture(GL_TEXTURE_2D, mOutputTexture); - ConfigureFrameTexture(outputFrameWidth, outputFrameHeight); + ConfigureDisplayFrameTexture(outputFrameWidth, outputFrameHeight); glBindFramebuffer(GL_FRAMEBUFFER, mOutputFrameBuf); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mOutputTexture, 0); @@ -89,6 +98,18 @@ bool OpenGLRenderer::InitializeResources(unsigned inputFrameWidth, unsigned inpu return false; } + glGenTextures(1, &mOutputPackTexture); + glBindTexture(GL_TEXTURE_2D, mOutputPackTexture); + ConfigureByteFrameTexture(outputPackTextureWidth, outputFrameHeight); + + glBindFramebuffer(GL_FRAMEBUFFER, mOutputPackFrameBuf); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mOutputPackTexture, 0); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) + { + error = "Cannot initialize output pack framebuffer."; + return false; + } + glBindTexture(GL_TEXTURE_2D, 0); glBindRenderbuffer(GL_RENDERBUFFER, 0); glBindFramebuffer(GL_FRAMEBUFFER, 0); @@ -109,6 +130,13 @@ void OpenGLRenderer::SetDecodeShaderProgram(GLuint program, GLuint vertexShader, mDecodeFragmentShader = fragmentShader; } +void OpenGLRenderer::SetOutputPackShaderProgram(GLuint program, GLuint vertexShader, GLuint fragmentShader) +{ + mOutputPackProgram = program; + mOutputPackVertexShader = vertexShader; + mOutputPackFragmentShader = fragmentShader; +} + void OpenGLRenderer::ResizeView(int width, int height) { mViewWidth = width; @@ -166,6 +194,8 @@ void OpenGLRenderer::DestroyResources() glDeleteFramebuffers(1, &mIdFrameBuf); if (mOutputFrameBuf != 0) glDeleteFramebuffers(1, &mOutputFrameBuf); + if (mOutputPackFrameBuf != 0) + glDeleteFramebuffers(1, &mOutputPackFrameBuf); if (mIdColorBuf != 0) glDeleteRenderbuffers(1, &mIdColorBuf); if (mIdDepthBuf != 0) @@ -180,8 +210,10 @@ void OpenGLRenderer::DestroyResources() glDeleteTextures(1, &mFBOTexture); if (mOutputTexture != 0) glDeleteTextures(1, &mOutputTexture); - if (mUnpinnedTextureBuffer != 0) - glDeleteBuffers(1, &mUnpinnedTextureBuffer); + if (mOutputPackTexture != 0) + glDeleteTextures(1, &mOutputPackTexture); + if (mTextureUploadBuffer != 0) + glDeleteBuffers(1, &mTextureUploadBuffer); mFullscreenVAO = 0; mGlobalParamsUBO = 0; @@ -189,6 +221,7 @@ void OpenGLRenderer::DestroyResources() mLayerTempFrameBuf = 0; mIdFrameBuf = 0; mOutputFrameBuf = 0; + mOutputPackFrameBuf = 0; mIdColorBuf = 0; mIdDepthBuf = 0; mCaptureTexture = 0; @@ -196,12 +229,14 @@ void OpenGLRenderer::DestroyResources() mLayerTempTexture = 0; mFBOTexture = 0; mOutputTexture = 0; - mUnpinnedTextureBuffer = 0; + mOutputPackTexture = 0; + mTextureUploadBuffer = 0; mGlobalParamsUBOSize = 0; mTemporalHistory.DestroyResources(); DestroyLayerPrograms(); DestroyDecodeShaderProgram(); + DestroyOutputPackShaderProgram(); } void OpenGLRenderer::DestroySingleLayerProgram(LayerProgram& layerProgram) @@ -272,3 +307,24 @@ void OpenGLRenderer::DestroyDecodeShaderProgram() mDecodeVertexShader = 0; } } + +void OpenGLRenderer::DestroyOutputPackShaderProgram() +{ + if (mOutputPackProgram != 0) + { + glDeleteProgram(mOutputPackProgram); + mOutputPackProgram = 0; + } + + if (mOutputPackFragmentShader != 0) + { + glDeleteShader(mOutputPackFragmentShader); + mOutputPackFragmentShader = 0; + } + + if (mOutputPackVertexShader != 0) + { + glDeleteShader(mOutputPackVertexShader); + mOutputPackVertexShader = 0; + } +} diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderer.h b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderer.h index 53d0cec..357fa3c 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderer.h +++ b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLRenderer.h @@ -44,21 +44,22 @@ public: std::vector textBindings; }; - bool FastTransferAvailable() const { return mFastTransferExtensionAvailable; } - void SetFastTransferAvailable(bool available) { mFastTransferExtensionAvailable = available; } GLuint CaptureTexture() const { return mCaptureTexture; } GLuint DecodedTexture() const { return mDecodedTexture; } GLuint LayerTempTexture() const { return mLayerTempTexture; } GLuint CompositeTexture() const { return mFBOTexture; } GLuint OutputTexture() const { return mOutputTexture; } - GLuint UnpinnedTextureBuffer() const { return mUnpinnedTextureBuffer; } + GLuint OutputPackTexture() const { return mOutputPackTexture; } + GLuint TextureUploadBuffer() const { return mTextureUploadBuffer; } GLuint DecodeFramebuffer() const { return mDecodeFrameBuf; } GLuint LayerTempFramebuffer() const { return mLayerTempFrameBuf; } GLuint CompositeFramebuffer() const { return mIdFrameBuf; } GLuint OutputFramebuffer() const { return mOutputFrameBuf; } + GLuint OutputPackFramebuffer() const { return mOutputPackFrameBuf; } GLuint FullscreenVertexArray() const { return mFullscreenVAO; } GLuint GlobalParamsUBO() const { return mGlobalParamsUBO; } GLuint DecodeProgram() const { return mDecodeProgram; } + GLuint OutputPackProgram() const { return mOutputPackProgram; } GLsizeiptr GlobalParamsUBOSize() const { return mGlobalParamsUBOSize; } void SetGlobalParamsUBOSize(GLsizeiptr size) { mGlobalParamsUBOSize = size; } void ReplaceLayerPrograms(std::vector& newPrograms) { mLayerPrograms.swap(newPrograms); } @@ -67,26 +68,29 @@ public: TemporalHistoryBuffers& TemporalHistory() { return mTemporalHistory; } const TemporalHistoryBuffers& TemporalHistory() const { return mTemporalHistory; } void SetDecodeShaderProgram(GLuint program, GLuint vertexShader, GLuint fragmentShader); - bool InitializeResources(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned outputFrameWidth, unsigned outputFrameHeight, std::string& error); + void SetOutputPackShaderProgram(GLuint program, GLuint vertexShader, GLuint fragmentShader); + bool InitializeResources(unsigned inputFrameWidth, unsigned inputFrameHeight, unsigned captureTextureWidth, unsigned outputFrameWidth, unsigned outputFrameHeight, unsigned outputPackTextureWidth, std::string& error); void ResizeView(int width, int height); void PresentToWindow(HDC hdc, unsigned outputFrameWidth, unsigned outputFrameHeight); void DestroyResources(); void DestroySingleLayerProgram(LayerProgram& layerProgram); void DestroyLayerPrograms(); void DestroyDecodeShaderProgram(); + void DestroyOutputPackShaderProgram(); private: - bool mFastTransferExtensionAvailable = false; GLuint mCaptureTexture = 0; GLuint mDecodedTexture = 0; GLuint mLayerTempTexture = 0; GLuint mFBOTexture = 0; GLuint mOutputTexture = 0; - GLuint mUnpinnedTextureBuffer = 0; + GLuint mOutputPackTexture = 0; + GLuint mTextureUploadBuffer = 0; GLuint mDecodeFrameBuf = 0; GLuint mLayerTempFrameBuf = 0; GLuint mIdFrameBuf = 0; GLuint mOutputFrameBuf = 0; + GLuint mOutputPackFrameBuf = 0; GLuint mIdColorBuf = 0; GLuint mIdDepthBuf = 0; GLuint mFullscreenVAO = 0; @@ -94,6 +98,9 @@ private: GLuint mDecodeProgram = 0; GLuint mDecodeVertexShader = 0; GLuint mDecodeFragmentShader = 0; + GLuint mOutputPackProgram = 0; + GLuint mOutputPackVertexShader = 0; + GLuint mOutputPackFragmentShader = 0; GLsizeiptr mGlobalParamsUBOSize = 0; int mViewWidth = 0; int mViewHeight = 0; diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLShaderPrograms.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLShaderPrograms.cpp index 2b50d14..b9dd3b7 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLShaderPrograms.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLShaderPrograms.cpp @@ -115,6 +115,11 @@ bool OpenGLShaderPrograms::CompileDecodeShader(int errorMessageSize, char* error return mCompiler.CompileDecodeShader(errorMessageSize, errorMessage); } +bool OpenGLShaderPrograms::CompileOutputPackShader(int errorMessageSize, char* errorMessage) +{ + return mCompiler.CompileOutputPackShader(errorMessageSize, errorMessage); +} + void OpenGLShaderPrograms::DestroySingleLayerProgram(LayerProgram& layerProgram) { mRenderer.DestroySingleLayerProgram(layerProgram); diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLShaderPrograms.h b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLShaderPrograms.h index a69611d..dcb9dfb 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLShaderPrograms.h +++ b/apps/LoopThroughWithOpenGLCompositing/gl/OpenGLShaderPrograms.h @@ -20,6 +20,7 @@ public: bool CompileLayerPrograms(unsigned inputFrameWidth, unsigned inputFrameHeight, int errorMessageSize, char* errorMessage); bool CommitPreparedLayerPrograms(const PreparedShaderBuild& preparedBuild, unsigned inputFrameWidth, unsigned inputFrameHeight, int errorMessageSize, char* errorMessage); bool CompileDecodeShader(int errorMessageSize, char* errorMessage); + bool CompileOutputPackShader(int errorMessageSize, char* errorMessage); void DestroyLayerPrograms(); void DestroySingleLayerProgram(LayerProgram& layerProgram); void DestroyDecodeShaderProgram(); diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/ShaderProgramCompiler.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/ShaderProgramCompiler.cpp index 14caf29..7b62f8d 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/ShaderProgramCompiler.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/gl/ShaderProgramCompiler.cpp @@ -192,3 +192,53 @@ bool ShaderProgramCompiler::CompileDecodeShader(int errorMessageSize, char* erro mRenderer.SetDecodeShaderProgram(newProgram.release(), newVertexShader.release(), newFragmentShader.release()); return true; } + +bool ShaderProgramCompiler::CompileOutputPackShader(int errorMessageSize, char* errorMessage) +{ + GLsizei errorBufferSize = 0; + GLint compileResult = GL_FALSE; + GLint linkResult = GL_FALSE; + const char* vertexSource = kFullscreenTriangleVertexShaderSource; + const char* fragmentSource = kOutputPackFragmentShaderSource; + + ScopedGlShader newVertexShader(glCreateShader(GL_VERTEX_SHADER)); + glShaderSource(newVertexShader.get(), 1, (const GLchar**)&vertexSource, NULL); + glCompileShader(newVertexShader.get()); + glGetShaderiv(newVertexShader.get(), GL_COMPILE_STATUS, &compileResult); + if (compileResult == GL_FALSE) + { + glGetShaderInfoLog(newVertexShader.get(), errorMessageSize, &errorBufferSize, errorMessage); + return false; + } + + ScopedGlShader newFragmentShader(glCreateShader(GL_FRAGMENT_SHADER)); + glShaderSource(newFragmentShader.get(), 1, (const GLchar**)&fragmentSource, NULL); + glCompileShader(newFragmentShader.get()); + glGetShaderiv(newFragmentShader.get(), GL_COMPILE_STATUS, &compileResult); + if (compileResult == GL_FALSE) + { + glGetShaderInfoLog(newFragmentShader.get(), errorMessageSize, &errorBufferSize, errorMessage); + return false; + } + + ScopedGlProgram newProgram(glCreateProgram()); + glAttachShader(newProgram.get(), newVertexShader.get()); + glAttachShader(newProgram.get(), newFragmentShader.get()); + glLinkProgram(newProgram.get()); + glGetProgramiv(newProgram.get(), GL_LINK_STATUS, &linkResult); + if (linkResult == GL_FALSE) + { + glGetProgramInfoLog(newProgram.get(), errorMessageSize, &errorBufferSize, errorMessage); + return false; + } + + glUseProgram(newProgram.get()); + const GLint outputSamplerLocation = glGetUniformLocation(newProgram.get(), "uOutputRgb"); + if (outputSamplerLocation >= 0) + glUniform1i(outputSamplerLocation, 0); + glUseProgram(0); + + mRenderer.DestroyOutputPackShaderProgram(); + mRenderer.SetOutputPackShaderProgram(newProgram.release(), newVertexShader.release(), newFragmentShader.release()); + return true; +} diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/ShaderProgramCompiler.h b/apps/LoopThroughWithOpenGLCompositing/gl/ShaderProgramCompiler.h index 8083a11..1ed388b 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/ShaderProgramCompiler.h +++ b/apps/LoopThroughWithOpenGLCompositing/gl/ShaderProgramCompiler.h @@ -16,6 +16,7 @@ public: bool CompileLayerProgram(const RuntimeRenderState& state, LayerProgram& layerProgram, int errorMessageSize, char* errorMessage); bool CompilePreparedLayerProgram(const RuntimeRenderState& state, const std::string& fragmentShaderSource, LayerProgram& layerProgram, int errorMessageSize, char* errorMessage); bool CompileDecodeShader(int errorMessageSize, char* errorMessage); + bool CompileOutputPackShader(int errorMessageSize, char* errorMessage); private: OpenGLRenderer& mRenderer; diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/TemporalHistoryBuffers.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/TemporalHistoryBuffers.cpp index 246fce6..c080630 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/TemporalHistoryBuffers.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/gl/TemporalHistoryBuffers.cpp @@ -105,7 +105,7 @@ bool TemporalHistoryBuffers::CreateRing(Ring& ring, unsigned effectiveLength, Te glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, frameWidth, frameHeight, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, frameWidth, frameHeight, 0, GL_RGBA, GL_FLOAT, NULL); glGenFramebuffers(1, &slot.framebuffer); glBindFramebuffer(GL_FRAMEBUFFER, slot.framebuffer); diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/TextureAssetLoader.h b/apps/LoopThroughWithOpenGLCompositing/gl/TextureAssetLoader.h index 1334b27..5ad385c 100644 --- a/apps/LoopThroughWithOpenGLCompositing/gl/TextureAssetLoader.h +++ b/apps/LoopThroughWithOpenGLCompositing/gl/TextureAssetLoader.h @@ -1,5 +1,6 @@ #pragma once +#include "GLExtensions.h" #include "ShaderTypes.h" #include diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/VideoFrameTransfer.cpp b/apps/LoopThroughWithOpenGLCompositing/gl/VideoFrameTransfer.cpp deleted file mode 100644 index 697099f..0000000 --- a/apps/LoopThroughWithOpenGLCompositing/gl/VideoFrameTransfer.cpp +++ /dev/null @@ -1,377 +0,0 @@ -/* -LICENSE-START- - ** Copyright (c) 2012 Blackmagic Design - ** - ** Permission is hereby granted, free of charge, to any person or organization - ** obtaining a copy of the software and accompanying documentation (the - ** "Software") to use, reproduce, display, distribute, sub-license, execute, - ** and transmit the Software, and to prepare derivative works of the Software, - ** and to permit third-parties to whom the Software is furnished to do so, in - ** accordance with: - ** - ** (1) if the Software is obtained from Blackmagic Design, the End User License - ** Agreement for the Software Development Kit ("EULA") available at - ** https://www.blackmagicdesign.com/EULA/DeckLinkSDK; or - ** - ** (2) if the Software is obtained from any third party, such licensing terms - ** as notified by that third party, - ** - ** and all subject to the following: - ** - ** (3) the copyright notices in the Software and this entire statement, - ** including the above license grant, this restriction and the following - ** disclaimer, must be included in all copies of the Software, in whole or in - ** part, and all derivative works of the Software, unless such copies or - ** derivative works are solely in the form of machine-executable object code - ** generated by a source language processor. - ** - ** (4) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - ** FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT - ** SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE - ** FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, - ** ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - ** DEALINGS IN THE SOFTWARE. - ** - ** A copy of the Software is available free of charge at - ** https://www.blackmagicdesign.com/desktopvideo_sdk under the EULA. - ** - ** -LICENSE-END- - */ - -#include "VideoFrameTransfer.h" -#include "NativeHandles.h" - - -#define DVP_CHECK(cmd) { \ - DVPStatus hr = (cmd); \ - if (DVP_STATUS_OK != hr) { \ - OutputDebugStringA( #cmd " failed\n" ); \ - ExitProcess(hr); \ - } \ -} - - -// Initialise static members -bool VideoFrameTransfer::mInitialized = false; -bool VideoFrameTransfer::mUseDvp = false; -unsigned VideoFrameTransfer::mWidth = 0; -unsigned VideoFrameTransfer::mHeight = 0; -GLuint VideoFrameTransfer::mCaptureTexture = 0; - -// NVIDIA specific static members -DVPBufferHandle VideoFrameTransfer::mDvpCaptureTextureHandle = 0; -DVPBufferHandle VideoFrameTransfer::mDvpPlaybackTextureHandle = 0; -uint32_t VideoFrameTransfer::mBufferAddrAlignment = 0; -uint32_t VideoFrameTransfer::mBufferGpuStrideAlignment = 0; -uint32_t VideoFrameTransfer::mSemaphoreAddrAlignment = 0; -uint32_t VideoFrameTransfer::mSemaphoreAllocSize = 0; -uint32_t VideoFrameTransfer::mSemaphorePayloadOffset = 0; -uint32_t VideoFrameTransfer::mSemaphorePayloadSize = 0; - - -bool VideoFrameTransfer::isNvidiaDvpAvailable() -{ - // Look for supported graphics boards - const GLubyte* renderer = glGetString(GL_RENDERER); - if (renderer == NULL) - return false; - - bool hasDvp = (strstr((char*)renderer, "Quadro") != NULL); - return hasDvp; -} - -bool VideoFrameTransfer::isAMDPinnedMemoryAvailable() -{ - // GL_AMD_pinned_memory presence indicates GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD buffer target is supported - const GLubyte* strExt = glGetString(GL_EXTENSIONS); - if (strExt == NULL) - { - // In a core profile context GL_EXTENSIONS is no longer queryable via glGetString(). - // Treat this as "extension unavailable" for now; the fast-transfer path is optional. - return false; - } - - bool hasAMDPinned = (strstr((char*)strExt, "GL_AMD_pinned_memory") != NULL); - return hasAMDPinned; -} - -bool VideoFrameTransfer::checkFastMemoryTransferAvailable() -{ - return (isNvidiaDvpAvailable() || isAMDPinnedMemoryAvailable()); -} - -bool VideoFrameTransfer::initialize(unsigned width, unsigned height, GLuint captureTexture, GLuint playbackTexture) -{ - if (mInitialized) - return false; - - bool hasDvp = isNvidiaDvpAvailable(); - bool hasAMDPinned = isAMDPinnedMemoryAvailable(); - - if (!hasDvp && !hasAMDPinned) - return false; - - mUseDvp = hasDvp; - mWidth = width; - mHeight = height; - mCaptureTexture = captureTexture; - - if (! initializeMemoryLocking(mWidth * mHeight * 4)) // BGRA uses 4 bytes per pixel - return false; - - if (mUseDvp) - { - // DVP initialisation - DVP_CHECK(dvpInitGLContext(DVP_DEVICE_FLAGS_SHARE_APP_CONTEXT)); - DVP_CHECK(dvpGetRequiredConstantsGLCtx( &mBufferAddrAlignment, &mBufferGpuStrideAlignment, - &mSemaphoreAddrAlignment, &mSemaphoreAllocSize, - &mSemaphorePayloadOffset, &mSemaphorePayloadSize)); - - // Register textures with DVP - DVP_CHECK(dvpCreateGPUTextureGL(captureTexture, &mDvpCaptureTextureHandle)); - DVP_CHECK(dvpCreateGPUTextureGL(playbackTexture, &mDvpPlaybackTextureHandle)); - } - - mInitialized = true; - - return true; -} - -bool VideoFrameTransfer::initializeMemoryLocking(unsigned memSize) -{ - // Increase the process working set size to allow pinning of memory. - static SIZE_T dwMin = 0, dwMax = 0; - UniqueHandle processHandle(OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_SET_QUOTA, FALSE, GetCurrentProcessId())); - if (!processHandle.valid()) - return false; - - // Retrieve the working set size of the process. - if (!dwMin && !GetProcessWorkingSetSize(processHandle.get(), &dwMin, &dwMax)) - return false; - - // Allow for 80 frames to be locked - BOOL res = SetProcessWorkingSetSize(processHandle.get(), memSize * 80 + dwMin, memSize * 80 + (dwMax-dwMin)); - if (!res) - return false; - - return true; -} - -// SyncInfo sets up a semaphore which is shared between the GPU and CPU and used to -// synchronise access to DVP buffers. -struct SyncInfo -{ - SyncInfo(uint32_t semaphoreAllocSize, uint32_t semaphoreAddrAlignment); - ~SyncInfo(); - - volatile uint32_t* mSem; - volatile uint32_t mReleaseValue; - volatile uint32_t mAcquireValue; - DVPSyncObjectHandle mDvpSync; -}; - -SyncInfo::SyncInfo(uint32_t semaphoreAllocSize, uint32_t semaphoreAddrAlignment) -{ - mSem = (uint32_t*)_aligned_malloc(semaphoreAllocSize, semaphoreAddrAlignment); - - // Initialise - mSem[0] = 0; - mReleaseValue = 0; - mAcquireValue = 0; - - // Setup DVP sync object and import it - DVPSyncObjectDesc syncObjectDesc; - syncObjectDesc.externalClientWaitFunc = NULL; - syncObjectDesc.sem = (uint32_t*)mSem; - - DVP_CHECK(dvpImportSyncObject(&syncObjectDesc, &mDvpSync)); -} - -SyncInfo::~SyncInfo() -{ - DVP_CHECK(dvpFreeSyncObject(mDvpSync)); - _aligned_free((void*)mSem); -} - -VideoFrameTransfer::VideoFrameTransfer(unsigned long memSize, void* address, Direction direction) : - mBuffer(address), - mMemSize(memSize), - mDirection(direction), - mExtSync(NULL), - mGpuSync(NULL), - mDvpSysMemHandle(0), - mBufferHandle(0) -{ - if (mUseDvp) - { - // Pin the memory - if (! VirtualLock(mBuffer, mMemSize)) - throw std::runtime_error("Error pinning memory with VirtualLock"); - - // Create necessary sysmem and gpu sync objects - mExtSync = new SyncInfo(mSemaphoreAllocSize, mSemaphoreAddrAlignment); - mGpuSync = new SyncInfo(mSemaphoreAllocSize, mSemaphoreAddrAlignment); - - // Register system memory buffers with DVP - DVPSysmemBufferDesc sysMemBuffersDesc; - sysMemBuffersDesc.width = mWidth; - sysMemBuffersDesc.height = mHeight; - sysMemBuffersDesc.stride = mWidth * 4; - sysMemBuffersDesc.format = DVP_BGRA; - sysMemBuffersDesc.type = DVP_UNSIGNED_BYTE; - sysMemBuffersDesc.size = mMemSize; - sysMemBuffersDesc.bufAddr = mBuffer; - - if (mDirection == CPUtoGPU) - { - // A UYVY 4:2:2 frame is transferred to the GPU, rather than RGB 4:4:4, so width is halved - sysMemBuffersDesc.width /= 2; - sysMemBuffersDesc.stride /= 2; - } - - DVP_CHECK(dvpCreateBuffer(&sysMemBuffersDesc, &mDvpSysMemHandle)); - DVP_CHECK(dvpBindToGLCtx(mDvpSysMemHandle)); - } - else - { - // Create an OpenGL buffer handle to use for pinned memory - GLuint bufferHandle; - glGenBuffers(1, &bufferHandle); - - // Pin memory by binding buffer to special AMD target. - glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, bufferHandle); - - // glBufferData() sets up the address so any OpenGL operation on this buffer will use system memory directly - // (assumes address is aligned to 4k boundary). - glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, mMemSize, address, GL_STREAM_DRAW); - GLenum result = glGetError(); - if (result != GL_NO_ERROR) - { - throw std::runtime_error("Error pinning memory with glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, ...)"); - } - glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); // Unbind buffer to target - - mBufferHandle = bufferHandle; - } -} - -VideoFrameTransfer::~VideoFrameTransfer() -{ - if (mUseDvp) - { - DVP_CHECK(dvpUnbindFromGLCtx(mDvpSysMemHandle)); - DVP_CHECK(dvpDestroyBuffer(mDvpSysMemHandle)); - - delete mExtSync; - delete mGpuSync; - - VirtualUnlock(mBuffer, mMemSize); - } - else - { - // The buffer is un-pinned by the GPU when the buffer is deleted - glDeleteBuffers(1, &mBufferHandle); - } -} - -bool VideoFrameTransfer::performFrameTransfer() -{ - if (mUseDvp) - { - // NVIDIA DVP transfers - DVPStatus status; - - mGpuSync->mReleaseValue++; - - dvpBegin(); - if (mDirection == CPUtoGPU) - { - // Copy from system memory to GPU texture - dvpMapBufferWaitDVP(mDvpCaptureTextureHandle); - status = dvpMemcpyLined( mDvpSysMemHandle, mExtSync->mDvpSync, mExtSync->mAcquireValue, DVP_TIMEOUT_IGNORED, - mDvpCaptureTextureHandle, mGpuSync->mDvpSync, mGpuSync->mReleaseValue, 0, mHeight); - dvpMapBufferEndDVP(mDvpCaptureTextureHandle); - } - else - { - // Copy from GPU texture to system memory - dvpMapBufferWaitDVP(mDvpPlaybackTextureHandle); - status = dvpMemcpyLined( mDvpPlaybackTextureHandle, mExtSync->mDvpSync, mExtSync->mReleaseValue, DVP_TIMEOUT_IGNORED, - mDvpSysMemHandle, mGpuSync->mDvpSync, mGpuSync->mReleaseValue, 0, mHeight); - dvpMapBufferEndDVP(mDvpPlaybackTextureHandle); - } - dvpEnd(); - - return (status == DVP_STATUS_OK); - } - else - { - // AMD pinned memory transfers - if (mDirection == CPUtoGPU) - { - glEnable(GL_TEXTURE_2D); - - // Use a pinned buffer for the GL_PIXEL_UNPACK_BUFFER target - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, mBufferHandle); - glBindTexture(GL_TEXTURE_2D, mCaptureTexture); - - // NULL for last arg indicates use current GL_PIXEL_UNPACK_BUFFER target as texture data - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, mWidth/2, mHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); - - // Ensure pinned texture has been transferred to GPU before we draw with it - GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 40 * 1000 * 1000); // timeout in nanosec - glDeleteSync(fence); - - glBindTexture(GL_TEXTURE_2D, 0); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glDisable(GL_TEXTURE_2D); - } - else - { - // Use a PIXEL PACK BUFFER to read back pixels - glBindBuffer(GL_PIXEL_PACK_BUFFER, mBufferHandle); - glReadPixels(0, 0, mWidth, mHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); - - // Ensure GPU has processed all commands in the pipeline up to this point, before memory is read by the CPU - GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, 40 * 1000 * 1000); // timeout in nanosec - glDeleteSync(fence); - } - - return (glGetError() == GL_NO_ERROR); - } -} - -void VideoFrameTransfer::waitForTransferComplete() -{ - if (!mUseDvp) - return; - - // Block until buffer has completely transferred between GPU and CPU buffer - dvpBegin(); - dvpSyncObjClientWaitComplete(mGpuSync->mDvpSync, DVP_TIMEOUT_IGNORED); - dvpEnd(); -} - -void VideoFrameTransfer::beginTextureInUse(Direction direction) -{ - if (!mUseDvp) - return; - - if (direction == CPUtoGPU) - dvpMapBufferWaitAPI(mDvpCaptureTextureHandle); - else - dvpMapBufferWaitAPI(mDvpPlaybackTextureHandle); -} - -void VideoFrameTransfer::endTextureInUse(Direction direction) -{ - if (!mUseDvp) - return; - - if (direction == CPUtoGPU) - dvpMapBufferEndAPI(mDvpCaptureTextureHandle); - else - dvpMapBufferEndAPI(mDvpPlaybackTextureHandle); -} diff --git a/apps/LoopThroughWithOpenGLCompositing/gl/VideoFrameTransfer.h b/apps/LoopThroughWithOpenGLCompositing/gl/VideoFrameTransfer.h deleted file mode 100644 index 41f6247..0000000 --- a/apps/LoopThroughWithOpenGLCompositing/gl/VideoFrameTransfer.h +++ /dev/null @@ -1,109 +0,0 @@ -/* -LICENSE-START- - ** Copyright (c) 2012 Blackmagic Design - ** - ** Permission is hereby granted, free of charge, to any person or organization - ** obtaining a copy of the software and accompanying documentation (the - ** "Software") to use, reproduce, display, distribute, sub-license, execute, - ** and transmit the Software, and to prepare derivative works of the Software, - ** and to permit third-parties to whom the Software is furnished to do so, in - ** accordance with: - ** - ** (1) if the Software is obtained from Blackmagic Design, the End User License - ** Agreement for the Software Development Kit ("EULA") available at - ** https://www.blackmagicdesign.com/EULA/DeckLinkSDK; or - ** - ** (2) if the Software is obtained from any third party, such licensing terms - ** as notified by that third party, - ** - ** and all subject to the following: - ** - ** (3) the copyright notices in the Software and this entire statement, - ** including the above license grant, this restriction and the following - ** disclaimer, must be included in all copies of the Software, in whole or in - ** part, and all derivative works of the Software, unless such copies or - ** derivative works are solely in the form of machine-executable object code - ** generated by a source language processor. - ** - ** (4) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - ** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - ** FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT - ** SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE - ** FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, - ** ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - ** DEALINGS IN THE SOFTWARE. - ** - ** A copy of the Software is available free of charge at - ** https://www.blackmagicdesign.com/desktopvideo_sdk under the EULA. - ** - ** -LICENSE-END- - */ -#ifndef __VIDEO_FRAME_TRANSFER_H__ -#define __VIDEO_FRAME_TRANSFER_H__ - -#include "GLExtensions.h" -#include -#include - -// NVIDIA GPU Direct For Video with OpenGL requires the following two headers. -// See the NVIDIA website to check if your graphics card is supported. -#include -#include - -struct SyncInfo; - - -// Class for performing efficient frame memory transfers between the CPU and GPU, -// using NVIDIA and AMD extensions. -class VideoFrameTransfer -{ -public: - enum Direction - { - CPUtoGPU, - GPUtoCPU - }; - - VideoFrameTransfer(unsigned long memSize, void* address, Direction direction); - ~VideoFrameTransfer(); - - static bool checkFastMemoryTransferAvailable(); - static bool initialize(unsigned width, unsigned height, GLuint captureTexture, GLuint playbackTexture); - static void beginTextureInUse(Direction direction); - static void endTextureInUse(Direction direction); - - bool performFrameTransfer(); - void waitForTransferComplete(); - -private: - static bool isNvidiaDvpAvailable(); - static bool isAMDPinnedMemoryAvailable(); - static bool initializeMemoryLocking(unsigned memSize); - - void* mBuffer; - unsigned long mMemSize; - Direction mDirection; - static bool mInitialized; - static bool mUseDvp; - static unsigned mWidth; - static unsigned mHeight; - static GLuint mCaptureTexture; - - // NVIDIA GPU Direct for Video support - SyncInfo* mExtSync; - SyncInfo* mGpuSync; - DVPBufferHandle mDvpSysMemHandle; - - static DVPBufferHandle mDvpCaptureTextureHandle; - static DVPBufferHandle mDvpPlaybackTextureHandle; - static uint32_t mBufferAddrAlignment; - static uint32_t mBufferGpuStrideAlignment; - static uint32_t mSemaphoreAddrAlignment; - static uint32_t mSemaphoreAllocSize; - static uint32_t mSemaphorePayloadOffset; - static uint32_t mSemaphorePayloadSize; - - // GPU buffer bound to the target GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD for pinned memory - GLuint mBufferHandle; -}; - -#endif \ No newline at end of file diff --git a/tests/VideoIOFormatTests.cpp b/tests/VideoIOFormatTests.cpp new file mode 100644 index 0000000..3216bf0 --- /dev/null +++ b/tests/VideoIOFormatTests.cpp @@ -0,0 +1,79 @@ +#include "VideoIOFormat.h" + +#include +#include + +namespace +{ +int gFailures = 0; + +void Expect(bool condition, const char* message) +{ + if (condition) + return; + + std::cerr << "FAIL: " << message << "\n"; + ++gFailures; +} + +void TestPreferredFormatSelection() +{ + Expect(ChoosePreferredVideoIOFormat(true) == VideoIOPixelFormat::V210, "10-bit is preferred when supported"); + Expect(ChoosePreferredVideoIOFormat(false) == VideoIOPixelFormat::Uyvy8, "8-bit is used as fallback"); + Expect(DeckLinkPixelFormatForVideoIO(VideoIOPixelFormat::V210) == bmdFormat10BitYUV, "v210 maps to DeckLink 10-bit YUV"); + Expect(DeckLinkPixelFormatForVideoIO(VideoIOPixelFormat::Uyvy8) == bmdFormat8BitYUV, "UYVY maps to DeckLink 8-bit YUV"); +} + +void TestRowByteHelpers() +{ + Expect(MinimumV210RowBytes(1920) == 5120, "1920-wide v210 active row bytes"); + Expect(MinimumV210RowBytes(1280) == 3424, "1280-wide v210 active row bytes rounds up to six-pixel group"); + Expect(MinimumV210RowBytes(3840) == 10240, "3840-wide v210 active row bytes"); + Expect(PackedTextureWidthFromRowBytes(5120) == 1280, "packed texture width is row bytes divided into RGBA byte texels"); + Expect(ActiveV210WordsForWidth(1920) == 1280, "active v210 words match 1920 width"); +} + +void TestV210PackUnpack() +{ + V210SixPixelBlock input; + input.y = { 64, 128, 256, 512, 768, 940 }; + input.cb = { 64, 512, 960 }; + input.cr = { 960, 512, 64 }; + + const V210SixPixelBlock output = UnpackV210Block(PackV210Block(input)); + Expect(output.y == input.y, "v210 luma survives pack/unpack"); + Expect(output.cb == input.cb, "v210 Cb survives pack/unpack"); + Expect(output.cr == input.cr, "v210 Cr survives pack/unpack"); +} + +void TestRec709LegalRanges() +{ + const V210CodeValues black = Rec709RgbToLegalV210(0.0f, 0.0f, 0.0f); + const V210CodeValues grey = Rec709RgbToLegalV210(0.5f, 0.5f, 0.5f); + const V210CodeValues white = Rec709RgbToLegalV210(1.0f, 1.0f, 1.0f); + + Expect(black.y == 64, "black maps to legal-range 10-bit luma minimum"); + Expect(white.y == 940, "white maps to legal-range 10-bit luma maximum"); + Expect(std::abs(static_cast(grey.y) - 502) <= 1, "middle grey maps near legal-range midpoint"); + Expect(black.cb == 512 && black.cr == 512, "black keeps neutral chroma"); + Expect(grey.cb == 512 && grey.cr == 512, "grey keeps neutral chroma"); + Expect(white.cb == 512 && white.cr == 512, "white keeps neutral chroma"); +} +} + +int main() +{ + TestPreferredFormatSelection(); + TestRowByteHelpers(); + TestV210PackUnpack(); + TestRec709LegalRanges(); + + if (gFailures != 0) + { + std::cerr << gFailures << " VideoIOFormat test failure(s).\n"; + return 1; + } + + std::cout << "VideoIOFormat tests passed.\n"; + return 0; +}