From 9a8748687ab81a0439e7d41b33bec82bc172fc89 Mon Sep 17 00:00:00 2001 From: Aiden Date: Tue, 5 May 2026 12:18:42 +1000 Subject: [PATCH] Audio experiments --- .../OpenGLComposite.cpp | 329 ++++++++++---- .../OpenGLComposite.h | 34 +- .../RuntimeHost.cpp | 15 + .../RuntimeHost.h | 10 + config/runtime-host.json | 5 + docs/AUDIO_TEARING_INVESTIGATION.md | 406 ++++++++++++++++++ 6 files changed, 719 insertions(+), 80 deletions(-) create mode 100644 docs/AUDIO_TEARING_INVESTIGATION.md diff --git a/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.cpp b/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.cpp index 43c3fb0..b82a975 100644 --- a/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.cpp @@ -44,6 +44,7 @@ #include "OscServer.h" #include +#include #include #include #include @@ -51,6 +52,7 @@ #include #include #include +#include #include #include @@ -66,6 +68,8 @@ constexpr GLuint kPackedVideoTextureUnit = 2; constexpr GLuint kGlobalParamsBindingPoint = 0; constexpr unsigned kVideoPrerollFrameCount = 5; constexpr unsigned kAudioOutputWaterLevelSampleFrames = kAudioSampleRate / 2; +constexpr unsigned kAudioScheduleChunkSampleFrames = kAudioSampleRate / 100; +constexpr unsigned kDeckLinkOutputAudioChannelCount = 16; #ifndef GL_RGBA32F #define GL_RGBA32F 0x8814 @@ -325,7 +329,7 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) : mDLInput(NULL), mDLOutput(NULL), mDLInputConfiguration(NULL), mDLKeyer(NULL), mPlayoutAllocator(NULL), mTotalPlayoutFrames(0), - mNextAudioSampleFrame(0), + mAudioOutputSampleTime(0), mInputFrameWidth(0), mInputFrameHeight(0), mOutputFrameWidth(0), mOutputFrameHeight(0), mInputDisplayModeName("1080p59.94"), @@ -358,7 +362,14 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) : mViewWidth(0), mViewHeight(0), mAudioEnabled(false), + mAudioOutputEnabled(false), + mAudioScheduleEnabled(false), + mAudioPrerollEnabled(false), + mAudioScheduleSilence(false), + mAudioScheduleTone(false), mAudioPrerolling(false), + mAudioSchedulerRunning(false), + mPlayoutCallbackActive(false), mTemporalHistoryNeedsReset(true) { InitializeCriticalSection(&pMutex); @@ -369,6 +380,11 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) : OpenGLComposite::~OpenGLComposite() { + mAudioSchedulerRunning.store(false); + mAudioPacketQueued.notify_all(); + if (mAudioSchedulerThread.joinable()) + mAudioSchedulerThread.join(); + // Cleanup for Capture if (mDLInput != NULL) { @@ -638,6 +654,11 @@ bool OpenGLComposite::InitDeckLink() if (! CheckOpenGLExtensions()) goto error; + if (mAudioOutputEnabled) + { + mFastTransferExtensionAvailable = false; + OutputDebugStringA("Audio output enabled; using DeckLink-owned output video frames for SDI stability.\n"); + } if (mInputFrameWidth != mOutputFrameWidth || mInputFrameHeight != mOutputFrameHeight) { mFastTransferExtensionAvailable = false; @@ -689,6 +710,11 @@ bool OpenGLComposite::InitDeckLink() } mAudioEnabled = mRuntimeHost ? mRuntimeHost->AudioEnabled() : true; + mAudioOutputEnabled = mAudioEnabled && (mRuntimeHost ? mRuntimeHost->AudioOutputEnabled() : true); + mAudioScheduleEnabled = mAudioOutputEnabled && (mRuntimeHost ? mRuntimeHost->AudioScheduleEnabled() : true); + mAudioPrerollEnabled = mAudioScheduleEnabled && (mRuntimeHost ? mRuntimeHost->AudioPrerollEnabled() : true); + mAudioScheduleSilence = mAudioScheduleEnabled && (mRuntimeHost ? mRuntimeHost->AudioScheduleSilence() : false); + mAudioScheduleTone = mAudioScheduleEnabled && (mRuntimeHost ? mRuntimeHost->AudioScheduleTone() : false); if (mAudioEnabled) { if (mDLInput->QueryInterface(IID_IDeckLinkConfiguration, (void**)&mDLInputConfiguration) == S_OK && mDLInputConfiguration != NULL) @@ -721,11 +747,10 @@ bool OpenGLComposite::InitDeckLink() if (mDLOutput->EnableVideoOutput(outputDisplayMode, bmdVideoOutputFlagDefault) != S_OK) goto error; - if (mAudioEnabled && mDLOutput->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, kAudioChannelCount, bmdAudioOutputStreamTimestamped) != S_OK) + if (mAudioOutputEnabled && mDLOutput->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, kDeckLinkOutputAudioChannelCount, bmdAudioOutputStreamContinuous) != S_OK) { OutputDebugStringA("Could not enable DeckLink audio output; continuing without audio.\n"); - mDLInput->DisableAudioInput(); - mAudioEnabled = false; + mAudioOutputEnabled = false; } if (mDLOutput->QueryInterface(IID_IDeckLinkKeyer, (void**)&mDLKeyer) == S_OK && mDLKeyer != NULL) @@ -778,12 +803,23 @@ bool OpenGLComposite::InitDeckLink() // If you want RGB 4:4:4 format to be played out "over the wire" in SDI, turn on the "Use 4:4:4 SDI" in the control // panel or turn on the bmdDeckLinkConfig444SDIVideoOutput flag using the IDeckLinkConfiguration interface. IDeckLinkMutableVideoFrame* outputFrame; - IDeckLinkVideoBuffer* outputFrameBuffer = NULL; + if (mAudioOutputEnabled) + { + if (mDLOutput->CreateVideoFrame(mOutputFrameWidth, mOutputFrameHeight, outputFrameRowBytes, bmdFormat8BitBGRA, bmdFrameFlagFlipVertical, &outputFrame) != S_OK) + goto error; + } + else + { + IDeckLinkVideoBuffer* outputFrameBuffer = NULL; - if (mPlayoutAllocator->AllocateVideoBuffer(&outputFrameBuffer) != S_OK) - goto error; + if (mPlayoutAllocator->AllocateVideoBuffer(&outputFrameBuffer) != S_OK) + goto error; - if (mDLOutput->CreateVideoFrameWithBuffer(mOutputFrameWidth, mOutputFrameHeight, outputFrameRowBytes, bmdFormat8BitBGRA, bmdFrameFlagFlipVertical, outputFrameBuffer, &outputFrame) != S_OK) + if (mDLOutput->CreateVideoFrameWithBuffer(mOutputFrameWidth, mOutputFrameHeight, outputFrameRowBytes, bmdFormat8BitBGRA, bmdFrameFlagFlipVertical, outputFrameBuffer, &outputFrame) != S_OK) + goto error; + } + + if (outputFrame == NULL) goto error; mDLOutputVideoFrameQueue.push_back(outputFrame); @@ -796,12 +832,11 @@ bool OpenGLComposite::InitDeckLink() if (mDLOutput->SetScheduledFrameCompletionCallback(mPlayoutDelegate) != S_OK) goto error; - if (mAudioEnabled && mDLOutput->SetAudioCallback(mPlayoutDelegate) != S_OK) + if (mAudioOutputEnabled && mDLOutput->SetAudioCallback(mPlayoutDelegate) != S_OK) { OutputDebugStringA("Could not set DeckLink audio output callback; continuing without audio.\n"); - mDLInput->DisableAudioInput(); mDLOutput->DisableAudioOutput(); - mAudioEnabled = false; + mAudioOutputEnabled = false; } bSuccess = true; @@ -1217,16 +1252,48 @@ void OpenGLComposite::AudioPacketArrived(IDeckLinkAudioInputPacket* audioPacket) if (sampleFrameCount <= 0) return; - mAudioDelayBuffer.PushInterleaved(static_cast(audioBytes), static_cast(sampleFrameCount)); - updateAudioStatus(); + TimestampedAudioPacket packet; + packet.block.interleavedSamples.assign( + static_cast(audioBytes), + static_cast(audioBytes) + (static_cast(sampleFrameCount) * kAudioChannelCount)); + + if (!mAudioScheduleEnabled) + { + AudioAnalysisSnapshot audioAnalysis; + { + std::lock_guard analyzerLock(mAudioAnalyzerMutex); + audioAnalysis = mAudioAnalyzer.Analyze(packet.block); + } + { + std::lock_guard audioLock(mAudioStateMutex); + mAudioAnalysis = audioAnalysis; + } + updateAudioStatus(); + return; + } + + { + std::lock_guard audioLock(mAudioStateMutex); + for (int32_t sample : packet.block.interleavedSamples) + mAudioSampleQueue.push_back(sample); + mQueuedAudioSampleFrames += static_cast(sampleFrameCount); + } + mAudioPacketQueued.notify_one(); + ScheduleAudioToWaterLevel(); } HRESULT OpenGLComposite::RenderAudioSamples(BOOL preroll) { - if (!mAudioEnabled || !mDLOutput) + return ScheduleAudioToWaterLevel(); +} + +HRESULT OpenGLComposite::ScheduleAudioToWaterLevel() +{ + if (!mAudioScheduleEnabled || !mDLOutput) return S_OK; - std::lock_guard audioLock(mAudioStateMutex); + if (mPlayoutCallbackActive.load(std::memory_order_acquire)) + return S_FALSE; unsigned bufferedSampleFrames = 0; if (mDLOutput->GetBufferedAudioSampleFrameCount(&bufferedSampleFrames) != S_OK) @@ -1235,24 +1302,88 @@ HRESULT OpenGLComposite::RenderAudioSamples(BOOL preroll) return E_FAIL; } - const unsigned delayedSampleFrames = delayedAudioSampleFrames(); - const unsigned waterLevel = kAudioOutputWaterLevelSampleFrames > delayedSampleFrames - ? kAudioOutputWaterLevelSampleFrames - : delayedSampleFrames; - if (bufferedSampleFrames >= waterLevel) + const unsigned audioWaterLevel = static_cast(AudioSampleTimeForVideoFrame(kVideoPrerollFrameCount, mFrameDuration, mFrameTimescale)); + if (bufferedSampleFrames >= audioWaterLevel) return S_OK; - const unsigned requestedSampleFrames = waterLevel - bufferedSampleFrames; - bool underrun = false; - AudioFrameBlock audioBlock = mAudioDelayBuffer.Pop(requestedSampleFrames, underrun); - mAudioAnalysis = mAudioAnalyzer.Analyze(audioBlock); + TimestampedAudioPacket packet; + bool poppedCapturedAudio = false; + { + std::unique_lock audioLock(mAudioStateMutex, std::try_to_lock); + if (!audioLock.owns_lock()) + return S_FALSE; + + const unsigned audioDeficitFrames = audioWaterLevel - bufferedSampleFrames; + const unsigned requestedFrames = audioDeficitFrames < kAudioScheduleChunkSampleFrames ? audioDeficitFrames : kAudioScheduleChunkSampleFrames; + if (requestedFrames == 0) + return S_OK; + + if (mAudioScheduleTone) + { + const std::size_t requestedSamples = static_cast(requestedFrames) * kAudioChannelCount; + packet.block.interleavedSamples.reserve(requestedSamples); + for (unsigned frame = 0; frame < requestedFrames; ++frame) + { + const double phase = (static_cast(mAudioToneSampleIndex++) * 440.0 * 6.28318530717958647692) / static_cast(kAudioSampleRate); + const int32_t sample = static_cast(std::sin(phase) * 0.125 * 2147483647.0); + for (unsigned channel = 0; channel < kAudioChannelCount; ++channel) + packet.block.interleavedSamples.push_back(sample); + } + } + else if (mAudioScheduleSilence) + { + packet.block.interleavedSamples.assign(static_cast(requestedFrames) * kAudioChannelCount, 0); + } + else + { + const std::size_t requestedSamples = static_cast(requestedFrames) * kAudioChannelCount; + packet.block.interleavedSamples.reserve(requestedSamples); + while (!mAudioSampleQueue.empty() && packet.block.interleavedSamples.size() < requestedSamples) + { + packet.block.interleavedSamples.push_back(mAudioSampleQueue.front()); + mAudioSampleQueue.pop_front(); + } + if (packet.block.interleavedSamples.size() < requestedSamples) + { + mAudioUnderrunCount++; + packet.block.interleavedSamples.resize(requestedSamples, 0); + } + const auto frameCount = static_cast(packet.block.frameCount()); + mQueuedAudioSampleFrames = frameCount <= mQueuedAudioSampleFrames ? mQueuedAudioSampleFrames - frameCount : 0; + poppedCapturedAudio = true; + } + } + + const unsigned sampleFrames = static_cast(packet.block.frameCount()); + if (sampleFrames == 0) + return S_FALSE; + + std::vector deckLinkAudioSamples(static_cast(sampleFrames) * kDeckLinkOutputAudioChannelCount, 0); + for (unsigned frame = 0; frame < sampleFrames; ++frame) + { + const std::size_t source = static_cast(frame) * kAudioChannelCount; + const std::size_t destination = static_cast(frame) * kDeckLinkOutputAudioChannelCount; + deckLinkAudioSamples[destination] = packet.block.interleavedSamples[source]; + deckLinkAudioSamples[destination + 1] = packet.block.interleavedSamples[source + 1]; + } + + if (mPlayoutCallbackActive.load(std::memory_order_acquire)) + { + std::lock_guard audioLock(mAudioStateMutex); + if (poppedCapturedAudio) + { + for (auto it = packet.block.interleavedSamples.rbegin(); it != packet.block.interleavedSamples.rend(); ++it) + mAudioSampleQueue.push_front(*it); + mQueuedAudioSampleFrames += sampleFrames; + } + return S_FALSE; + } unsigned sampleFramesWritten = 0; - const unsigned sampleFrames = static_cast(audioBlock.frameCount()); - const HRESULT scheduleResult = mDLOutput->ScheduleAudioSamples( - audioBlock.interleavedSamples.data(), + HRESULT scheduleResult = mDLOutput->ScheduleAudioSamples( + deckLinkAudioSamples.data(), sampleFrames, - static_cast(mNextAudioSampleFrame), + static_cast(mAudioOutputSampleTime), kAudioSampleRate, &sampleFramesWritten); @@ -1260,41 +1391,69 @@ HRESULT OpenGLComposite::RenderAudioSamples(BOOL preroll) { if (sampleFramesWritten == 0 && sampleFrames > 0) OutputDebugStringA("DeckLink accepted audio schedule call but wrote 0 sample frames.\n"); - mNextAudioSampleFrame += sampleFramesWritten; + mAudioOutputSampleTime += sampleFramesWritten; + + AudioFrameBlock analysisBlock = packet.block; + AudioAnalysisSnapshot audioAnalysis; + { + std::lock_guard analyzerLock(mAudioAnalyzerMutex); + audioAnalysis = mAudioAnalyzer.Analyze(analysisBlock); + } + { + std::lock_guard audioLock(mAudioStateMutex); + mAudioAnalysis = audioAnalysis; + packet.scheduledOutputSamples = std::move(deckLinkAudioSamples); + mScheduledAudioPacketRetainQueue.push_back(std::move(packet)); + while (mScheduledAudioPacketRetainQueue.size() > 64) + mScheduledAudioPacketRetainQueue.pop_front(); + } + updateAudioStatus(); } else { OutputDebugStringA("DeckLink ScheduleAudioSamples failed while topping up audio output.\n"); } - updateAudioStatus(); return scheduleResult; } +void OpenGLComposite::AudioSchedulingLoop() +{ + while (mAudioSchedulerRunning.load()) + { + ScheduleAudioToWaterLevel(); + + std::unique_lock audioLock(mAudioStateMutex); + mAudioPacketQueued.wait_for(audioLock, std::chrono::milliseconds(20), [this]() + { + return !mAudioSchedulerRunning.load() || !mAudioPacketQueue.empty(); + }); + } +} + // Render the live video texture through the runtime shader into the off-screen framebuffer. // Read the result back from the frame buffer and schedule it for playout. void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult) { + mPlayoutCallbackActive.store(true, std::memory_order_release); EnterCriticalSection(&pMutex); + auto leavePlayoutCallback = [this]() + { + mPlayoutCallbackActive.store(false, std::memory_order_release); + LeaveCriticalSection(&pMutex); + }; - // Get the first frame from the queue - IDeckLinkMutableVideoFrame* outputVideoFrame = mDLOutputVideoFrameQueue.front(); - mDLOutputVideoFrameQueue.push_back(outputVideoFrame); - mDLOutputVideoFrameQueue.pop_front(); + if (!completedFrame) + { + leavePlayoutCallback(); + return; + } + + IDeckLinkVideoFrame* outputVideoFrame = completedFrame; // make GL context current in this thread wglMakeCurrent( hGLDC, hGLRC ); - if (mAudioEnabled) - { - AudioAnalysisSnapshot audioAnalysis; - { - std::lock_guard audioLock(mAudioStateMutex); - audioAnalysis = mAudioAnalysis; - } - updateAudioDataTexture(audioAnalysis); - } - // Draw the effect output to the off-screen framebuffer. const auto renderStartTime = std::chrono::steady_clock::now(); if (mFastTransferExtensionAvailable) @@ -1323,14 +1482,14 @@ void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, IDeckLinkVideoBuffer* outputVideoFrameBuffer; if (outputVideoFrame->QueryInterface(IID_IDeckLinkVideoBuffer, (void**)&outputVideoFrameBuffer) != S_OK) { - LeaveCriticalSection(&pMutex); + leavePlayoutCallback(); return; } if (outputVideoFrameBuffer->StartAccess(bmdBufferAccessWrite) != S_OK) { outputVideoFrameBuffer->Release(); - LeaveCriticalSection(&pMutex); + leavePlayoutCallback(); return; } @@ -1371,22 +1530,21 @@ void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, wglMakeCurrent( NULL, NULL ); - LeaveCriticalSection(&pMutex); + leavePlayoutCallback(); } bool OpenGLComposite::Start() { mTotalPlayoutFrames = 0; initializeAudioDelay(); - if (mAudioEnabled) + if (mAudioPrerollEnabled) { mDLOutput->FlushBufferedAudioSamples(); if (mDLOutput->BeginAudioPreroll() != S_OK) { OutputDebugStringA("Could not begin DeckLink audio preroll; continuing without audio.\n"); - mDLInput->DisableAudioInput(); mDLOutput->DisableAudioOutput(); - mAudioEnabled = false; + mAudioOutputEnabled = false; } else { @@ -1426,26 +1584,37 @@ bool OpenGLComposite::Start() mTotalPlayoutFrames++; } - if (mAudioEnabled) - RenderAudioSamples(TRUE); + if (mDLInput->StartStreams() != S_OK) + { + return false; + } if (mAudioPrerolling) { + const unsigned audioWaterLevel = static_cast(AudioSampleTimeForVideoFrame(kVideoPrerollFrameCount, mFrameDuration, mFrameTimescale)); + const auto prerollDeadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(750); + while (mAudioScheduleEnabled && std::chrono::steady_clock::now() < prerollDeadline) + { + unsigned bufferedSampleFrames = 0; + if (mDLOutput->GetBufferedAudioSampleFrameCount(&bufferedSampleFrames) == S_OK && bufferedSampleFrames >= audioWaterLevel) + break; + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + } + if (mDLOutput->EndAudioPreroll() != S_OK) { OutputDebugStringA("Could not end DeckLink audio preroll; continuing without audio.\n"); - mDLInput->DisableAudioInput(); mDLOutput->DisableAudioOutput(); - mAudioEnabled = false; + mAudioOutputEnabled = false; + mAudioScheduleEnabled = false; } mAudioPrerolling = false; } - if (mDLInput->StartStreams() != S_OK) - return false; - if (mDLOutput->StartScheduledPlayback(0, mFrameTimescale, 1.0) != S_OK) + { return false; + } return true; } @@ -1475,6 +1644,11 @@ bool OpenGLComposite::Stop() } } + mAudioSchedulerRunning.store(false); + mAudioPacketQueued.notify_all(); + if (mAudioSchedulerThread.joinable()) + mAudioSchedulerThread.join(); + mDLInput->StopStreams(); mDLInput->DisableVideoInput(); if (mAudioEnabled) @@ -1485,7 +1659,7 @@ bool OpenGLComposite::Stop() mDLOutput->SetScheduledFrameCompletionCallback(NULL); mDLOutput->DisableVideoOutput(); mAudioPrerolling = false; - if (mAudioEnabled) + if (mAudioOutputEnabled) mDLOutput->DisableAudioOutput(); return true; @@ -2260,30 +2434,33 @@ void OpenGLComposite::broadcastRuntimeState() mControlServer->BroadcastState(); } -unsigned OpenGLComposite::delayedAudioSampleFrames() const +BMDTimeValue OpenGLComposite::delayedAudioStreamTime() const { - return static_cast(AudioSampleTimeForVideoFrame(kVideoPrerollFrameCount, mFrameDuration, mFrameTimescale)); + return static_cast(kVideoPrerollFrameCount) * mFrameDuration; } void OpenGLComposite::initializeAudioDelay() { - std::lock_guard audioLock(mAudioStateMutex); - mAudioAnalyzer.Reset(); - mAudioAnalysis = AudioAnalysisSnapshot(); - mAudioDelayBuffer.Reset(delayedAudioSampleFrames()); - mNextAudioSampleFrame = 0; + { + std::lock_guard analyzerLock(mAudioAnalyzerMutex); + mAudioAnalyzer.Reset(); + } + { + std::lock_guard audioLock(mAudioStateMutex); + mAudioAnalysis = AudioAnalysisSnapshot(); + mAudioPacketQueue.clear(); + mScheduledAudioPacketRetainQueue.clear(); + mAudioSampleQueue.clear(); + mQueuedAudioSampleFrames = 0; + mAudioUnderrunCount = 0; + mAudioOutputSampleTime = 0; + mAudioToneSampleIndex = 0; + mHasFirstAudioPacketTime = false; + mFirstAudioPacketTime = 0; + } updateAudioStatus(); } -AudioFrameBlock OpenGLComposite::popAudioForVideoFrame(uint64_t videoFrameIndex) -{ - const unsigned sampleFrames = AudioSamplesForVideoFrame(videoFrameIndex, mFrameDuration, mFrameTimescale); - bool underrun = false; - AudioFrameBlock block = mAudioDelayBuffer.Pop(sampleFrames, underrun); - mAudioAnalysis = mAudioAnalyzer.Analyze(block); - return block; -} - void OpenGLComposite::updateAudioDataTexture(const AudioAnalysisSnapshot& analysis) { if (mAudioDataTexture == 0) @@ -2303,8 +2480,8 @@ void OpenGLComposite::updateAudioStatus() AudioStatusSnapshot status; status.enabled = mAudioEnabled; - status.bufferedSampleFrames = mAudioDelayBuffer.BufferedSampleFrames(); - status.underrunCount = mAudioDelayBuffer.UnderrunCount(); + status.bufferedSampleFrames = mQueuedAudioSampleFrames; + status.underrunCount = mAudioUnderrunCount; status.analysis = mAudioAnalysis; mRuntimeHost->SetAudioStatus(status); } diff --git a/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.h b/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.h index 335bee2..8e05f0d 100644 --- a/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.h +++ b/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.h @@ -57,11 +57,13 @@ #include "RuntimeHost.h" #include +#include #include #include #include #include #include +#include #include #include @@ -101,6 +103,8 @@ public: void VideoFrameArrived(IDeckLinkVideoInputFrame* inputFrame, bool hasNoInputSource); void AudioPacketArrived(IDeckLinkAudioInputPacket* audioPacket); HRESULT RenderAudioSamples(BOOL preroll); + HRESULT ScheduleAudioToWaterLevel(); + void AudioSchedulingLoop(); void PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult result); private: @@ -124,7 +128,7 @@ private: BMDTimeValue mFrameDuration; BMDTimeScale mFrameTimescale; unsigned mTotalPlayoutFrames; - uint64_t mNextAudioSampleFrame; + uint64_t mAudioOutputSampleTime; unsigned mInputFrameWidth; unsigned mInputFrameHeight; unsigned mOutputFrameWidth; @@ -166,11 +170,34 @@ private: std::unique_ptr mControlServer; std::unique_ptr mOscServer; bool mAudioEnabled; + bool mAudioOutputEnabled; + bool mAudioScheduleEnabled; + bool mAudioPrerollEnabled; + bool mAudioScheduleSilence; + bool mAudioScheduleTone; bool mAudioPrerolling; + std::atomic mAudioSchedulerRunning; + std::atomic mPlayoutCallbackActive; + std::thread mAudioSchedulerThread; std::mutex mAudioStateMutex; - AudioDelayBuffer mAudioDelayBuffer; + std::mutex mAudioAnalyzerMutex; AudioAnalyzer mAudioAnalyzer; AudioAnalysisSnapshot mAudioAnalysis; + struct TimestampedAudioPacket + { + AudioFrameBlock block; + std::vector scheduledOutputSamples; + BMDTimeValue streamTime = 0; + }; + std::deque mAudioPacketQueue; + std::deque mScheduledAudioPacketRetainQueue; + std::deque mAudioSampleQueue; + std::condition_variable mAudioPacketQueued; + unsigned mQueuedAudioSampleFrames = 0; + uint64_t mAudioUnderrunCount = 0; + uint64_t mAudioToneSampleIndex = 0; + bool mHasFirstAudioPacketTime = false; + BMDTimeValue mFirstAudioPacketTime = 0; struct LayerProgram { @@ -224,8 +251,7 @@ private: bool PollRuntimeChanges(); void broadcastRuntimeState(); void initializeAudioDelay(); - unsigned delayedAudioSampleFrames() const; - AudioFrameBlock popAudioForVideoFrame(uint64_t videoFrameIndex); + BMDTimeValue delayedAudioStreamTime() const; void updateAudioDataTexture(const AudioAnalysisSnapshot& analysis); void updateAudioStatus(); bool updateGlobalParamsBuffer(const RuntimeRenderState& state, unsigned availableSourceHistoryLength, unsigned availableTemporalHistoryLength); diff --git a/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.cpp b/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.cpp index d8db93b..8bd9269 100644 --- a/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.cpp @@ -1191,6 +1191,16 @@ bool RuntimeHost::LoadConfig(std::string& error) mConfig.enableExternalKeying = enableExternalKeyingValue->asBoolean(mConfig.enableExternalKeying); if (const JsonValue* audioEnabledValue = configJson.find("audioEnabled")) mConfig.audioEnabled = audioEnabledValue->asBoolean(mConfig.audioEnabled); + if (const JsonValue* audioOutputEnabledValue = configJson.find("audioOutputEnabled")) + mConfig.audioOutputEnabled = audioOutputEnabledValue->asBoolean(mConfig.audioOutputEnabled); + if (const JsonValue* audioScheduleEnabledValue = configJson.find("audioScheduleEnabled")) + mConfig.audioScheduleEnabled = audioScheduleEnabledValue->asBoolean(mConfig.audioScheduleEnabled); + if (const JsonValue* audioPrerollEnabledValue = configJson.find("audioPrerollEnabled")) + mConfig.audioPrerollEnabled = audioPrerollEnabledValue->asBoolean(mConfig.audioPrerollEnabled); + if (const JsonValue* audioScheduleSilenceValue = configJson.find("audioScheduleSilence")) + mConfig.audioScheduleSilence = audioScheduleSilenceValue->asBoolean(mConfig.audioScheduleSilence); + if (const JsonValue* audioScheduleToneValue = configJson.find("audioScheduleTone")) + mConfig.audioScheduleTone = audioScheduleToneValue->asBoolean(mConfig.audioScheduleTone); if (const JsonValue* audioChannelCountValue = configJson.find("audioChannelCount")) mConfig.audioChannelCount = static_cast(audioChannelCountValue->asNumber(static_cast(mConfig.audioChannelCount))); if (const JsonValue* audioSampleRateValue = configJson.find("audioSampleRate")) @@ -1542,6 +1552,11 @@ JsonValue RuntimeHost::BuildStateValue() const app.set("maxTemporalHistoryFrames", JsonValue(static_cast(mConfig.maxTemporalHistoryFrames))); app.set("enableExternalKeying", JsonValue(mConfig.enableExternalKeying)); app.set("audioEnabled", JsonValue(mConfig.audioEnabled)); + app.set("audioOutputEnabled", JsonValue(mConfig.audioOutputEnabled)); + app.set("audioScheduleEnabled", JsonValue(mConfig.audioScheduleEnabled)); + app.set("audioPrerollEnabled", JsonValue(mConfig.audioPrerollEnabled)); + app.set("audioScheduleSilence", JsonValue(mConfig.audioScheduleSilence)); + app.set("audioScheduleTone", JsonValue(mConfig.audioScheduleTone)); app.set("audioChannelCount", JsonValue(static_cast(mConfig.audioChannelCount))); app.set("audioSampleRate", JsonValue(static_cast(mConfig.audioSampleRate))); app.set("audioDelayMode", JsonValue(mConfig.audioDelayMode)); diff --git a/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.h b/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.h index 8b53c39..85c3789 100644 --- a/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.h +++ b/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.h @@ -54,6 +54,11 @@ public: unsigned GetMaxTemporalHistoryFrames() const { return mConfig.maxTemporalHistoryFrames; } bool ExternalKeyingEnabled() const { return mConfig.enableExternalKeying; } bool AudioEnabled() const { return mConfig.audioEnabled; } + bool AudioOutputEnabled() const { return mConfig.audioOutputEnabled; } + bool AudioScheduleEnabled() const { return mConfig.audioScheduleEnabled; } + bool AudioPrerollEnabled() const { return mConfig.audioPrerollEnabled; } + bool AudioScheduleSilence() const { return mConfig.audioScheduleSilence; } + bool AudioScheduleTone() const { return mConfig.audioScheduleTone; } unsigned AudioChannelCount() const { return mConfig.audioChannelCount; } unsigned AudioSampleRate() const { return mConfig.audioSampleRate; } const std::string& GetInputVideoFormat() const { return mConfig.inputVideoFormat; } @@ -73,6 +78,11 @@ private: unsigned maxTemporalHistoryFrames = 4; bool enableExternalKeying = false; bool audioEnabled = true; + bool audioOutputEnabled = true; + bool audioScheduleEnabled = true; + bool audioPrerollEnabled = true; + bool audioScheduleSilence = false; + bool audioScheduleTone = false; unsigned audioChannelCount = kAudioChannelCount; unsigned audioSampleRate = kAudioSampleRate; std::string audioDelayMode = "matchVideoPreroll"; diff --git a/config/runtime-host.json b/config/runtime-host.json index f5bf579..adf90b8 100644 --- a/config/runtime-host.json +++ b/config/runtime-host.json @@ -9,6 +9,11 @@ "autoReload": true, "maxTemporalHistoryFrames": 12, "audioEnabled": true, + "audioOutputEnabled": true, + "audioScheduleEnabled": true, + "audioPrerollEnabled": true, + "audioScheduleSilence": false, + "audioScheduleTone": false, "audioChannelCount": 2, "audioSampleRate": 48000, "audioDelayMode": "matchVideoPreroll", diff --git a/docs/AUDIO_TEARING_INVESTIGATION.md b/docs/AUDIO_TEARING_INVESTIGATION.md new file mode 100644 index 0000000..37a9e74 --- /dev/null +++ b/docs/AUDIO_TEARING_INVESTIGATION.md @@ -0,0 +1,406 @@ +# Audio / SDI Tearing Investigation + +Date: 2026-05-05 + +## Problem + +After adding DeckLink audio pass-through, the SDI output intermittently shows a torn/corrupted frame. The preview window does not show the artifact. + +Observed artifact: + +- Bottom portion of the SDI image can show an offset mix of current/previous frame. +- Looks like a frame-buffer or output-transfer issue rather than shader rendering. +- Occurs even with all shaders bypassed. +- Main branch is known good with no tearing. + +Later tests also showed audio tearing/stutter when non-silent audio was scheduled. + +## Known Good Baseline + +- `main` branch has no SDI tearing. +- Current branch with `audioEnabled: false` ran for several minutes with no visible tearing. + +This strongly suggests the issue is tied to DeckLink audio output/scheduling rather than the shader stack. + +## SDK References Checked + +### `InputLoopThrough` + +Location: + +`3rdParty/Blackmagic DeckLink SDK 16.0/Win/Samples/InputLoopThrough` + +Findings: + +- This is the SDK loop-through sample that keeps audio. +- It preserves DeckLink audio packet timestamps using `GetPacketTime(..., m_frameTimescale)`. +- It schedules audio packets with `ScheduleAudioSamples(..., packetTime, m_frameTimescale, ...)`. +- It uses 16-channel 32-bit embedded audio by default. +- It has separate scheduling threads for video/audio. +- It waits for both video and audio preroll before `StartScheduledPlayback`. + +### `LoopThroughWithOpenGLCompositing` + +Location: + +`3rdParty/Blackmagic DeckLink SDK 16.0/Win/Samples/LoopThroughWithOpenGLCompositing` + +Findings: + +- This sample is the base for this app. +- It ignores `IDeckLinkAudioInputPacket`. +- It does not demonstrate audio pass-through. + +### `SignalGenerator` + +Location: + +`3rdParty/Blackmagic DeckLink SDK 16.0/Win/Samples/SignalGenerator` + +Findings: + +- Uses `RenderAudioSamples()` callback to top up audio when DeckLink requests samples. +- Uses `GetBufferedAudioSampleFrameCount()` and a water level before scheduling more audio. + +## Tests Tried And Results + +### 1. Initial audio pass-through with FIFO and sample-time accumulator + +Implementation: + +- Copied incoming audio into a stereo FIFO. +- Scheduled audio with a generated `mNextAudioSampleFrame` clock in 48 kHz timescale. +- Matched delay to video preroll. + +Result: + +- Audio eventually worked. +- SDI video tearing appeared. + +Conclusion: + +- Basic audio output path triggered SDI instability. + +### 2. Reworked audio toward SDK `InputLoopThrough` packet-timestamp model + +Implementation: + +- Preserved incoming packet time via `GetPacketTime(..., mFrameTimescale)`. +- Queued timestamped audio packets. +- Scheduled packets with `ScheduleAudioSamples(..., packet.streamTime, mFrameTimescale, ...)`. + +Result: + +- Tearing persisted. + +Conclusion: + +- Simply matching SDK timestamp domain did not fix the issue. + +### 3. Restored video callback closer to `main` + +Implementation: + +- Removed extra `glFinish()` calls. +- Restored preview/readback ordering closer to `main`. +- Re-enabled fast transfer path after earlier tests disabled it. +- Removed audio texture upload from video playout callback. +- Removed audio analysis and audio locks from video playout callback. +- Removed DeckLink scheduling mutex around `ScheduleVideoFrame`. + +Result: + +- Tearing frequency seemed reduced at one point, but tearing persisted. + +Conclusion: + +- Extra work in the playout callback may have made timing worse, but was not the root cause. + +### 4. Disabled audio completely + +Config: + +```json +"audioEnabled": false +``` + +Result: + +- Ran for several minutes with no visible tearing. + +Conclusion: + +- The tearing is tied to audio being enabled. + +### 5. Enabled audio input/analysis but disabled DeckLink audio output + +Config: + +```json +"audioEnabled": true, +"audioOutputEnabled": false +``` + +Result: + +- No tearing appeared. + +Conclusion: + +- DeckLink audio input and CPU analysis are not the trigger. +- The problem is on the DeckLink audio output side. + +### 6. Enabled DeckLink audio output but disabled scheduling + +Config: + +```json +"audioEnabled": true, +"audioOutputEnabled": true, +"audioScheduleEnabled": false +``` + +Result: + +- No video tearing. +- Slight stutter appeared. + +Conclusion: + +- `EnableAudioOutput()` alone did not produce the tearing. +- Stutter was likely from enabling an audio output stream without feeding it samples. + +### 7. Enabled audio scheduling but skipped audio preroll + +Config: + +```json +"audioEnabled": true, +"audioOutputEnabled": true, +"audioScheduleEnabled": true, +"audioPrerollEnabled": false +``` + +Result: + +- Video tearing returned. +- Stutter also present. + +Conclusion: + +- `BeginAudioPreroll()` / `EndAudioPreroll()` are not required to trigger the tear. +- `ScheduleAudioSamples()` is strongly implicated. + +### 8. Retained scheduled audio packet memory after `ScheduleAudioSamples` + +Implementation: + +- Kept scheduled packet buffers alive in a retain queue after scheduling. +- Avoided passing DeckLink pointers to vectors that immediately went out of scope. + +Result: + +- Video tearing and stutter persisted. + +Conclusion: + +- Buffer lifetime after `ScheduleAudioSamples()` was not the root cause. + +### 9. Added audio water-level cap + +Implementation: + +- Restored SDK-style `GetBufferedAudioSampleFrameCount()` check. +- Only scheduled more audio if DeckLink buffer was below the target water level. + +Result: + +- Stutter was reduced. +- Video tearing persisted. + +Conclusion: + +- Overscheduling contributed to stutter/timing pressure. +- It did not explain the tearing. + +### 10. Removed standalone audio scheduler thread + +Implementation: + +- Stopped starting the dedicated audio scheduler thread. +- Audio top-up occurred from input packet arrival and `RenderAudioSamples()` callback. + +Result: + +- No meaningful change. + +Conclusion: + +- The polling thread itself was not the cause. + +### 11. Switched from timestamped audio output to continuous audio output + +Implementation: + +- Changed audio output to `bmdAudioOutputStreamContinuous`. +- Scheduled audio using a monotonic 48 kHz sample clock. + +Result: + +- Video tearing and stutter persisted. + +Conclusion: + +- The issue was not specific to timestamped output mode. + +### 12. Rendered into the actual `completedFrame` + +Implementation: + +- Changed `PlayoutFrameCompleted()` to reuse the exact `completedFrame` passed by DeckLink rather than rotating an independent output-frame queue. + +Result: + +- No change. + +Conclusion: + +- The app was probably not overwriting a still-in-use frame from its output queue. + +### 13. Scheduled generated silence instead of captured audio + +Config: + +```json +"audioScheduleSilence": true +``` + +Result: + +- Occasional stutter. +- No video tearing. + +Conclusion: + +- Scheduling audio buffers itself can be stable if the audio data is zero. +- Non-zero audio data appears to be important. + +### 14. Flattened captured audio into PCM FIFO and scheduled fixed chunks + +Implementation: + +- Captured packets were flattened into a PCM FIFO. +- DeckLink received fixed 10 ms chunks rather than original packet boundaries. +- Missing audio was padded with silence. + +Result: + +- Video tearing returned. +- Audio stutter/tearing returned. + +Conclusion: + +- Packet boundaries/timestamps were not the whole cause. +- Non-zero captured audio data still triggered instability. + +### 15. Scheduled generated 440 Hz tone + +Config: + +```json +"audioScheduleTone": true +``` + +Result: + +- Video tearing occurred. +- Tone/audio also tore. + +Conclusion: + +- The issue is not specific to captured input data. +- Non-zero scheduled audio, even generated tone, triggers the problem. + +### 16. Changed DeckLink output to 16 embedded audio channels + +Implementation: + +- Enabled DeckLink audio output with 16 channels instead of 2. +- Mapped stereo to channels 1/2. +- Filled channels 3-16 with silence. + +Result: + +- Video tearing and audio tearing still occurred. + +Conclusion: + +- The issue is not simply caused by 2-channel embedded audio output. + +### 17. Used DeckLink-owned output video frames with audio enabled + +Implementation: + +- When audio output is enabled: + - disabled fast transfer path + - created output frames with `CreateVideoFrame()` + - avoided `CreateVideoFrameWithBuffer()` and the custom pinned playout allocator + +Result: + +- Video tearing and audio tearing still occurred. + +Conclusion: + +- The custom pinned output video buffers are likely not the root cause. + +## Current Strong Conclusions + +- Shader stack is not the cause. +- Preview/render output is not showing the issue, so the artifact is SDI/output-side. +- DeckLink audio input is not the cause. +- DeckLink audio output enabled but unscheduled does not cause tearing. +- `ScheduleAudioSamples()` with zero/silent buffers does not cause tearing. +- `ScheduleAudioSamples()` with non-zero audio causes both video tearing and audio tearing. +- The problem persists across: + - timestamped audio output + - continuous audio output + - captured audio + - generated tone + - 2-channel output + - 16-channel embedded output + - app-owned/pinned output video buffers + - DeckLink-owned output video frames + +## Current Hypothesis + +The issue appears to be a DeckLink output interaction where non-zero embedded audio samples disturb SDI video/audio output in this app’s scheduling model. + +Since silence is stable but tone is not, the next likely areas to investigate are: + +- Audio sample format/range/endian expectations. +- Whether DeckLink expects 32-bit audio samples to be in a different effective range than we are providing. +- Whether the scheduled audio buffer layout for the selected hardware/output mode differs from our assumptions. +- Whether the selected output mode/keyer/SDI configuration has constraints when non-zero embedded audio is present. +- Whether the SDK sample behaves correctly on the same hardware with a generated tone and same video mode. + +## Suggested Next Tests + +1. Schedule very low amplitude non-zero audio, e.g. constant `1`, then `256`, then a very quiet sine. +2. Try 16-bit audio output instead of 32-bit if supported. +3. Try `bmdAudioOutputStreamContinuousDontResample`. +4. Disable external keying and test with non-zero audio. +5. Build/run the SDK `SignalGenerator` or `InputLoopThrough` sample on the same DeckLink device, video mode, and SDI output path with non-zero embedded audio. +6. Add instrumentation for DeckLink status/errors around scheduled video/audio completion. +7. Confirm Desktop Video setup panel audio/SDI settings for the selected output. + +## Current Config At Time Of Note + +```json +"audioEnabled": true, +"audioOutputEnabled": true, +"audioScheduleEnabled": true, +"audioPrerollEnabled": true, +"audioScheduleSilence": false, +"audioScheduleTone": false +```