Audio experiments

This commit is contained in:
2026-05-05 12:18:42 +10:00
parent f836c53d10
commit 9a8748687a
6 changed files with 719 additions and 80 deletions

View File

@@ -44,6 +44,7 @@
#include "OscServer.h"
#include <algorithm>
#include <chrono>
#include <cstdint>
#include <cstring>
#include <cctype>
@@ -51,6 +52,7 @@
#include <set>
#include <sstream>
#include <string>
#include <thread>
#include <vector>
#include <initguid.h>
@@ -66,6 +68,8 @@ constexpr GLuint kPackedVideoTextureUnit = 2;
constexpr GLuint kGlobalParamsBindingPoint = 0;
constexpr unsigned kVideoPrerollFrameCount = 5;
constexpr unsigned kAudioOutputWaterLevelSampleFrames = kAudioSampleRate / 2;
constexpr unsigned kAudioScheduleChunkSampleFrames = kAudioSampleRate / 100;
constexpr unsigned kDeckLinkOutputAudioChannelCount = 16;
#ifndef GL_RGBA32F
#define GL_RGBA32F 0x8814
@@ -325,7 +329,7 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) :
mDLInput(NULL), mDLOutput(NULL), mDLInputConfiguration(NULL), mDLKeyer(NULL),
mPlayoutAllocator(NULL),
mTotalPlayoutFrames(0),
mNextAudioSampleFrame(0),
mAudioOutputSampleTime(0),
mInputFrameWidth(0), mInputFrameHeight(0),
mOutputFrameWidth(0), mOutputFrameHeight(0),
mInputDisplayModeName("1080p59.94"),
@@ -358,7 +362,14 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) :
mViewWidth(0),
mViewHeight(0),
mAudioEnabled(false),
mAudioOutputEnabled(false),
mAudioScheduleEnabled(false),
mAudioPrerollEnabled(false),
mAudioScheduleSilence(false),
mAudioScheduleTone(false),
mAudioPrerolling(false),
mAudioSchedulerRunning(false),
mPlayoutCallbackActive(false),
mTemporalHistoryNeedsReset(true)
{
InitializeCriticalSection(&pMutex);
@@ -369,6 +380,11 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) :
OpenGLComposite::~OpenGLComposite()
{
mAudioSchedulerRunning.store(false);
mAudioPacketQueued.notify_all();
if (mAudioSchedulerThread.joinable())
mAudioSchedulerThread.join();
// Cleanup for Capture
if (mDLInput != NULL)
{
@@ -638,6 +654,11 @@ bool OpenGLComposite::InitDeckLink()
if (! CheckOpenGLExtensions())
goto error;
if (mAudioOutputEnabled)
{
mFastTransferExtensionAvailable = false;
OutputDebugStringA("Audio output enabled; using DeckLink-owned output video frames for SDI stability.\n");
}
if (mInputFrameWidth != mOutputFrameWidth || mInputFrameHeight != mOutputFrameHeight)
{
mFastTransferExtensionAvailable = false;
@@ -689,6 +710,11 @@ bool OpenGLComposite::InitDeckLink()
}
mAudioEnabled = mRuntimeHost ? mRuntimeHost->AudioEnabled() : true;
mAudioOutputEnabled = mAudioEnabled && (mRuntimeHost ? mRuntimeHost->AudioOutputEnabled() : true);
mAudioScheduleEnabled = mAudioOutputEnabled && (mRuntimeHost ? mRuntimeHost->AudioScheduleEnabled() : true);
mAudioPrerollEnabled = mAudioScheduleEnabled && (mRuntimeHost ? mRuntimeHost->AudioPrerollEnabled() : true);
mAudioScheduleSilence = mAudioScheduleEnabled && (mRuntimeHost ? mRuntimeHost->AudioScheduleSilence() : false);
mAudioScheduleTone = mAudioScheduleEnabled && (mRuntimeHost ? mRuntimeHost->AudioScheduleTone() : false);
if (mAudioEnabled)
{
if (mDLInput->QueryInterface(IID_IDeckLinkConfiguration, (void**)&mDLInputConfiguration) == S_OK && mDLInputConfiguration != NULL)
@@ -721,11 +747,10 @@ bool OpenGLComposite::InitDeckLink()
if (mDLOutput->EnableVideoOutput(outputDisplayMode, bmdVideoOutputFlagDefault) != S_OK)
goto error;
if (mAudioEnabled && mDLOutput->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, kAudioChannelCount, bmdAudioOutputStreamTimestamped) != S_OK)
if (mAudioOutputEnabled && mDLOutput->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, kDeckLinkOutputAudioChannelCount, bmdAudioOutputStreamContinuous) != S_OK)
{
OutputDebugStringA("Could not enable DeckLink audio output; continuing without audio.\n");
mDLInput->DisableAudioInput();
mAudioEnabled = false;
mAudioOutputEnabled = false;
}
if (mDLOutput->QueryInterface(IID_IDeckLinkKeyer, (void**)&mDLKeyer) == S_OK && mDLKeyer != NULL)
@@ -778,12 +803,23 @@ bool OpenGLComposite::InitDeckLink()
// If you want RGB 4:4:4 format to be played out "over the wire" in SDI, turn on the "Use 4:4:4 SDI" in the control
// panel or turn on the bmdDeckLinkConfig444SDIVideoOutput flag using the IDeckLinkConfiguration interface.
IDeckLinkMutableVideoFrame* outputFrame;
IDeckLinkVideoBuffer* outputFrameBuffer = NULL;
if (mAudioOutputEnabled)
{
if (mDLOutput->CreateVideoFrame(mOutputFrameWidth, mOutputFrameHeight, outputFrameRowBytes, bmdFormat8BitBGRA, bmdFrameFlagFlipVertical, &outputFrame) != S_OK)
goto error;
}
else
{
IDeckLinkVideoBuffer* outputFrameBuffer = NULL;
if (mPlayoutAllocator->AllocateVideoBuffer(&outputFrameBuffer) != S_OK)
goto error;
if (mPlayoutAllocator->AllocateVideoBuffer(&outputFrameBuffer) != S_OK)
goto error;
if (mDLOutput->CreateVideoFrameWithBuffer(mOutputFrameWidth, mOutputFrameHeight, outputFrameRowBytes, bmdFormat8BitBGRA, bmdFrameFlagFlipVertical, outputFrameBuffer, &outputFrame) != S_OK)
if (mDLOutput->CreateVideoFrameWithBuffer(mOutputFrameWidth, mOutputFrameHeight, outputFrameRowBytes, bmdFormat8BitBGRA, bmdFrameFlagFlipVertical, outputFrameBuffer, &outputFrame) != S_OK)
goto error;
}
if (outputFrame == NULL)
goto error;
mDLOutputVideoFrameQueue.push_back(outputFrame);
@@ -796,12 +832,11 @@ bool OpenGLComposite::InitDeckLink()
if (mDLOutput->SetScheduledFrameCompletionCallback(mPlayoutDelegate) != S_OK)
goto error;
if (mAudioEnabled && mDLOutput->SetAudioCallback(mPlayoutDelegate) != S_OK)
if (mAudioOutputEnabled && mDLOutput->SetAudioCallback(mPlayoutDelegate) != S_OK)
{
OutputDebugStringA("Could not set DeckLink audio output callback; continuing without audio.\n");
mDLInput->DisableAudioInput();
mDLOutput->DisableAudioOutput();
mAudioEnabled = false;
mAudioOutputEnabled = false;
}
bSuccess = true;
@@ -1217,16 +1252,48 @@ void OpenGLComposite::AudioPacketArrived(IDeckLinkAudioInputPacket* audioPacket)
if (sampleFrameCount <= 0)
return;
mAudioDelayBuffer.PushInterleaved(static_cast<const int32_t*>(audioBytes), static_cast<std::size_t>(sampleFrameCount));
updateAudioStatus();
TimestampedAudioPacket packet;
packet.block.interleavedSamples.assign(
static_cast<const int32_t*>(audioBytes),
static_cast<const int32_t*>(audioBytes) + (static_cast<std::size_t>(sampleFrameCount) * kAudioChannelCount));
if (!mAudioScheduleEnabled)
{
AudioAnalysisSnapshot audioAnalysis;
{
std::lock_guard<std::mutex> analyzerLock(mAudioAnalyzerMutex);
audioAnalysis = mAudioAnalyzer.Analyze(packet.block);
}
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
mAudioAnalysis = audioAnalysis;
}
updateAudioStatus();
return;
}
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
for (int32_t sample : packet.block.interleavedSamples)
mAudioSampleQueue.push_back(sample);
mQueuedAudioSampleFrames += static_cast<unsigned>(sampleFrameCount);
}
mAudioPacketQueued.notify_one();
ScheduleAudioToWaterLevel();
}
HRESULT OpenGLComposite::RenderAudioSamples(BOOL preroll)
{
if (!mAudioEnabled || !mDLOutput)
return ScheduleAudioToWaterLevel();
}
HRESULT OpenGLComposite::ScheduleAudioToWaterLevel()
{
if (!mAudioScheduleEnabled || !mDLOutput)
return S_OK;
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
if (mPlayoutCallbackActive.load(std::memory_order_acquire))
return S_FALSE;
unsigned bufferedSampleFrames = 0;
if (mDLOutput->GetBufferedAudioSampleFrameCount(&bufferedSampleFrames) != S_OK)
@@ -1235,24 +1302,88 @@ HRESULT OpenGLComposite::RenderAudioSamples(BOOL preroll)
return E_FAIL;
}
const unsigned delayedSampleFrames = delayedAudioSampleFrames();
const unsigned waterLevel = kAudioOutputWaterLevelSampleFrames > delayedSampleFrames
? kAudioOutputWaterLevelSampleFrames
: delayedSampleFrames;
if (bufferedSampleFrames >= waterLevel)
const unsigned audioWaterLevel = static_cast<unsigned>(AudioSampleTimeForVideoFrame(kVideoPrerollFrameCount, mFrameDuration, mFrameTimescale));
if (bufferedSampleFrames >= audioWaterLevel)
return S_OK;
const unsigned requestedSampleFrames = waterLevel - bufferedSampleFrames;
bool underrun = false;
AudioFrameBlock audioBlock = mAudioDelayBuffer.Pop(requestedSampleFrames, underrun);
mAudioAnalysis = mAudioAnalyzer.Analyze(audioBlock);
TimestampedAudioPacket packet;
bool poppedCapturedAudio = false;
{
std::unique_lock<std::mutex> audioLock(mAudioStateMutex, std::try_to_lock);
if (!audioLock.owns_lock())
return S_FALSE;
const unsigned audioDeficitFrames = audioWaterLevel - bufferedSampleFrames;
const unsigned requestedFrames = audioDeficitFrames < kAudioScheduleChunkSampleFrames ? audioDeficitFrames : kAudioScheduleChunkSampleFrames;
if (requestedFrames == 0)
return S_OK;
if (mAudioScheduleTone)
{
const std::size_t requestedSamples = static_cast<std::size_t>(requestedFrames) * kAudioChannelCount;
packet.block.interleavedSamples.reserve(requestedSamples);
for (unsigned frame = 0; frame < requestedFrames; ++frame)
{
const double phase = (static_cast<double>(mAudioToneSampleIndex++) * 440.0 * 6.28318530717958647692) / static_cast<double>(kAudioSampleRate);
const int32_t sample = static_cast<int32_t>(std::sin(phase) * 0.125 * 2147483647.0);
for (unsigned channel = 0; channel < kAudioChannelCount; ++channel)
packet.block.interleavedSamples.push_back(sample);
}
}
else if (mAudioScheduleSilence)
{
packet.block.interleavedSamples.assign(static_cast<std::size_t>(requestedFrames) * kAudioChannelCount, 0);
}
else
{
const std::size_t requestedSamples = static_cast<std::size_t>(requestedFrames) * kAudioChannelCount;
packet.block.interleavedSamples.reserve(requestedSamples);
while (!mAudioSampleQueue.empty() && packet.block.interleavedSamples.size() < requestedSamples)
{
packet.block.interleavedSamples.push_back(mAudioSampleQueue.front());
mAudioSampleQueue.pop_front();
}
if (packet.block.interleavedSamples.size() < requestedSamples)
{
mAudioUnderrunCount++;
packet.block.interleavedSamples.resize(requestedSamples, 0);
}
const auto frameCount = static_cast<unsigned>(packet.block.frameCount());
mQueuedAudioSampleFrames = frameCount <= mQueuedAudioSampleFrames ? mQueuedAudioSampleFrames - frameCount : 0;
poppedCapturedAudio = true;
}
}
const unsigned sampleFrames = static_cast<unsigned>(packet.block.frameCount());
if (sampleFrames == 0)
return S_FALSE;
std::vector<int32_t> deckLinkAudioSamples(static_cast<std::size_t>(sampleFrames) * kDeckLinkOutputAudioChannelCount, 0);
for (unsigned frame = 0; frame < sampleFrames; ++frame)
{
const std::size_t source = static_cast<std::size_t>(frame) * kAudioChannelCount;
const std::size_t destination = static_cast<std::size_t>(frame) * kDeckLinkOutputAudioChannelCount;
deckLinkAudioSamples[destination] = packet.block.interleavedSamples[source];
deckLinkAudioSamples[destination + 1] = packet.block.interleavedSamples[source + 1];
}
if (mPlayoutCallbackActive.load(std::memory_order_acquire))
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
if (poppedCapturedAudio)
{
for (auto it = packet.block.interleavedSamples.rbegin(); it != packet.block.interleavedSamples.rend(); ++it)
mAudioSampleQueue.push_front(*it);
mQueuedAudioSampleFrames += sampleFrames;
}
return S_FALSE;
}
unsigned sampleFramesWritten = 0;
const unsigned sampleFrames = static_cast<unsigned>(audioBlock.frameCount());
const HRESULT scheduleResult = mDLOutput->ScheduleAudioSamples(
audioBlock.interleavedSamples.data(),
HRESULT scheduleResult = mDLOutput->ScheduleAudioSamples(
deckLinkAudioSamples.data(),
sampleFrames,
static_cast<BMDTimeValue>(mNextAudioSampleFrame),
static_cast<BMDTimeValue>(mAudioOutputSampleTime),
kAudioSampleRate,
&sampleFramesWritten);
@@ -1260,41 +1391,69 @@ HRESULT OpenGLComposite::RenderAudioSamples(BOOL preroll)
{
if (sampleFramesWritten == 0 && sampleFrames > 0)
OutputDebugStringA("DeckLink accepted audio schedule call but wrote 0 sample frames.\n");
mNextAudioSampleFrame += sampleFramesWritten;
mAudioOutputSampleTime += sampleFramesWritten;
AudioFrameBlock analysisBlock = packet.block;
AudioAnalysisSnapshot audioAnalysis;
{
std::lock_guard<std::mutex> analyzerLock(mAudioAnalyzerMutex);
audioAnalysis = mAudioAnalyzer.Analyze(analysisBlock);
}
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
mAudioAnalysis = audioAnalysis;
packet.scheduledOutputSamples = std::move(deckLinkAudioSamples);
mScheduledAudioPacketRetainQueue.push_back(std::move(packet));
while (mScheduledAudioPacketRetainQueue.size() > 64)
mScheduledAudioPacketRetainQueue.pop_front();
}
updateAudioStatus();
}
else
{
OutputDebugStringA("DeckLink ScheduleAudioSamples failed while topping up audio output.\n");
}
updateAudioStatus();
return scheduleResult;
}
void OpenGLComposite::AudioSchedulingLoop()
{
while (mAudioSchedulerRunning.load())
{
ScheduleAudioToWaterLevel();
std::unique_lock<std::mutex> audioLock(mAudioStateMutex);
mAudioPacketQueued.wait_for(audioLock, std::chrono::milliseconds(20), [this]()
{
return !mAudioSchedulerRunning.load() || !mAudioPacketQueue.empty();
});
}
}
// Render the live video texture through the runtime shader into the off-screen framebuffer.
// Read the result back from the frame buffer and schedule it for playout.
void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult)
{
mPlayoutCallbackActive.store(true, std::memory_order_release);
EnterCriticalSection(&pMutex);
auto leavePlayoutCallback = [this]()
{
mPlayoutCallbackActive.store(false, std::memory_order_release);
LeaveCriticalSection(&pMutex);
};
// Get the first frame from the queue
IDeckLinkMutableVideoFrame* outputVideoFrame = mDLOutputVideoFrameQueue.front();
mDLOutputVideoFrameQueue.push_back(outputVideoFrame);
mDLOutputVideoFrameQueue.pop_front();
if (!completedFrame)
{
leavePlayoutCallback();
return;
}
IDeckLinkVideoFrame* outputVideoFrame = completedFrame;
// make GL context current in this thread
wglMakeCurrent( hGLDC, hGLRC );
if (mAudioEnabled)
{
AudioAnalysisSnapshot audioAnalysis;
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
audioAnalysis = mAudioAnalysis;
}
updateAudioDataTexture(audioAnalysis);
}
// Draw the effect output to the off-screen framebuffer.
const auto renderStartTime = std::chrono::steady_clock::now();
if (mFastTransferExtensionAvailable)
@@ -1323,14 +1482,14 @@ void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame,
IDeckLinkVideoBuffer* outputVideoFrameBuffer;
if (outputVideoFrame->QueryInterface(IID_IDeckLinkVideoBuffer, (void**)&outputVideoFrameBuffer) != S_OK)
{
LeaveCriticalSection(&pMutex);
leavePlayoutCallback();
return;
}
if (outputVideoFrameBuffer->StartAccess(bmdBufferAccessWrite) != S_OK)
{
outputVideoFrameBuffer->Release();
LeaveCriticalSection(&pMutex);
leavePlayoutCallback();
return;
}
@@ -1371,22 +1530,21 @@ void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame,
wglMakeCurrent( NULL, NULL );
LeaveCriticalSection(&pMutex);
leavePlayoutCallback();
}
bool OpenGLComposite::Start()
{
mTotalPlayoutFrames = 0;
initializeAudioDelay();
if (mAudioEnabled)
if (mAudioPrerollEnabled)
{
mDLOutput->FlushBufferedAudioSamples();
if (mDLOutput->BeginAudioPreroll() != S_OK)
{
OutputDebugStringA("Could not begin DeckLink audio preroll; continuing without audio.\n");
mDLInput->DisableAudioInput();
mDLOutput->DisableAudioOutput();
mAudioEnabled = false;
mAudioOutputEnabled = false;
}
else
{
@@ -1426,26 +1584,37 @@ bool OpenGLComposite::Start()
mTotalPlayoutFrames++;
}
if (mAudioEnabled)
RenderAudioSamples(TRUE);
if (mDLInput->StartStreams() != S_OK)
{
return false;
}
if (mAudioPrerolling)
{
const unsigned audioWaterLevel = static_cast<unsigned>(AudioSampleTimeForVideoFrame(kVideoPrerollFrameCount, mFrameDuration, mFrameTimescale));
const auto prerollDeadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(750);
while (mAudioScheduleEnabled && std::chrono::steady_clock::now() < prerollDeadline)
{
unsigned bufferedSampleFrames = 0;
if (mDLOutput->GetBufferedAudioSampleFrameCount(&bufferedSampleFrames) == S_OK && bufferedSampleFrames >= audioWaterLevel)
break;
std::this_thread::sleep_for(std::chrono::milliseconds(5));
}
if (mDLOutput->EndAudioPreroll() != S_OK)
{
OutputDebugStringA("Could not end DeckLink audio preroll; continuing without audio.\n");
mDLInput->DisableAudioInput();
mDLOutput->DisableAudioOutput();
mAudioEnabled = false;
mAudioOutputEnabled = false;
mAudioScheduleEnabled = false;
}
mAudioPrerolling = false;
}
if (mDLInput->StartStreams() != S_OK)
return false;
if (mDLOutput->StartScheduledPlayback(0, mFrameTimescale, 1.0) != S_OK)
{
return false;
}
return true;
}
@@ -1475,6 +1644,11 @@ bool OpenGLComposite::Stop()
}
}
mAudioSchedulerRunning.store(false);
mAudioPacketQueued.notify_all();
if (mAudioSchedulerThread.joinable())
mAudioSchedulerThread.join();
mDLInput->StopStreams();
mDLInput->DisableVideoInput();
if (mAudioEnabled)
@@ -1485,7 +1659,7 @@ bool OpenGLComposite::Stop()
mDLOutput->SetScheduledFrameCompletionCallback(NULL);
mDLOutput->DisableVideoOutput();
mAudioPrerolling = false;
if (mAudioEnabled)
if (mAudioOutputEnabled)
mDLOutput->DisableAudioOutput();
return true;
@@ -2260,30 +2434,33 @@ void OpenGLComposite::broadcastRuntimeState()
mControlServer->BroadcastState();
}
unsigned OpenGLComposite::delayedAudioSampleFrames() const
BMDTimeValue OpenGLComposite::delayedAudioStreamTime() const
{
return static_cast<unsigned>(AudioSampleTimeForVideoFrame(kVideoPrerollFrameCount, mFrameDuration, mFrameTimescale));
return static_cast<BMDTimeValue>(kVideoPrerollFrameCount) * mFrameDuration;
}
void OpenGLComposite::initializeAudioDelay()
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
mAudioAnalyzer.Reset();
mAudioAnalysis = AudioAnalysisSnapshot();
mAudioDelayBuffer.Reset(delayedAudioSampleFrames());
mNextAudioSampleFrame = 0;
{
std::lock_guard<std::mutex> analyzerLock(mAudioAnalyzerMutex);
mAudioAnalyzer.Reset();
}
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
mAudioAnalysis = AudioAnalysisSnapshot();
mAudioPacketQueue.clear();
mScheduledAudioPacketRetainQueue.clear();
mAudioSampleQueue.clear();
mQueuedAudioSampleFrames = 0;
mAudioUnderrunCount = 0;
mAudioOutputSampleTime = 0;
mAudioToneSampleIndex = 0;
mHasFirstAudioPacketTime = false;
mFirstAudioPacketTime = 0;
}
updateAudioStatus();
}
AudioFrameBlock OpenGLComposite::popAudioForVideoFrame(uint64_t videoFrameIndex)
{
const unsigned sampleFrames = AudioSamplesForVideoFrame(videoFrameIndex, mFrameDuration, mFrameTimescale);
bool underrun = false;
AudioFrameBlock block = mAudioDelayBuffer.Pop(sampleFrames, underrun);
mAudioAnalysis = mAudioAnalyzer.Analyze(block);
return block;
}
void OpenGLComposite::updateAudioDataTexture(const AudioAnalysisSnapshot& analysis)
{
if (mAudioDataTexture == 0)
@@ -2303,8 +2480,8 @@ void OpenGLComposite::updateAudioStatus()
AudioStatusSnapshot status;
status.enabled = mAudioEnabled;
status.bufferedSampleFrames = mAudioDelayBuffer.BufferedSampleFrames();
status.underrunCount = mAudioDelayBuffer.UnderrunCount();
status.bufferedSampleFrames = mQueuedAudioSampleFrames;
status.underrunCount = mAudioUnderrunCount;
status.analysis = mAudioAnalysis;
mRuntimeHost->SetAudioStatus(status);
}