Initial audio support

This commit is contained in:
2026-05-04 14:32:29 +10:00
parent 44316b29c2
commit f836c53d10
17 changed files with 977 additions and 10 deletions

View File

@@ -60,9 +60,16 @@ DEFINE_GUID(IID_PinnedMemoryAllocator,
namespace
{
constexpr GLuint kDecodedVideoTextureUnit = 1;
constexpr GLuint kSourceHistoryTextureUnitBase = 2;
constexpr GLuint kAudioDataTextureUnit = 2;
constexpr GLuint kSourceHistoryTextureUnitBase = 3;
constexpr GLuint kPackedVideoTextureUnit = 2;
constexpr GLuint kGlobalParamsBindingPoint = 0;
constexpr unsigned kVideoPrerollFrameCount = 5;
constexpr unsigned kAudioOutputWaterLevelSampleFrames = kAudioSampleRate / 2;
#ifndef GL_RGBA32F
#define GL_RGBA32F 0x8814
#endif
const char* kVertexShaderSource =
"#version 430 core\n"
"out vec2 vTexCoord;\n"
@@ -315,8 +322,10 @@ void AppendStd140Vec4(std::vector<unsigned char>& buffer, float x, float y, floa
OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) :
hGLWnd(hWnd), hGLDC(hDC), hGLRC(hRC),
mCaptureDelegate(NULL), mPlayoutDelegate(NULL),
mDLInput(NULL), mDLOutput(NULL), mDLKeyer(NULL),
mDLInput(NULL), mDLOutput(NULL), mDLInputConfiguration(NULL), mDLKeyer(NULL),
mPlayoutAllocator(NULL),
mTotalPlayoutFrames(0),
mNextAudioSampleFrame(0),
mInputFrameWidth(0), mInputFrameHeight(0),
mOutputFrameWidth(0), mOutputFrameHeight(0),
mInputDisplayModeName("1080p59.94"),
@@ -332,6 +341,7 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) :
mLayerTempTexture(0),
mFBOTexture(0),
mOutputTexture(0),
mAudioDataTexture(0),
mUnpinnedTextureBuffer(0),
mDecodeFrameBuf(0),
mLayerTempFrameBuf(0),
@@ -347,6 +357,8 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) :
mGlobalParamsUBOSize(0),
mViewWidth(0),
mViewHeight(0),
mAudioEnabled(false),
mAudioPrerolling(false),
mTemporalHistoryNeedsReset(true)
{
InitializeCriticalSection(&pMutex);
@@ -362,6 +374,12 @@ OpenGLComposite::~OpenGLComposite()
{
mDLInput->SetCallback(NULL);
if (mDLInputConfiguration != NULL)
{
mDLInputConfiguration->Release();
mDLInputConfiguration = NULL;
}
mDLInput->Release();
mDLInput = NULL;
}
@@ -394,6 +412,7 @@ OpenGLComposite::~OpenGLComposite()
}
mDLOutput->SetScheduledFrameCompletionCallback(NULL);
mDLOutput->SetAudioCallback(NULL);
mDLOutput->Release();
mDLOutput = NULL;
@@ -435,6 +454,8 @@ OpenGLComposite::~OpenGLComposite()
glDeleteTextures(1, &mFBOTexture);
if (mOutputTexture != 0)
glDeleteTextures(1, &mOutputTexture);
if (mAudioDataTexture != 0)
glDeleteTextures(1, &mAudioDataTexture);
if (mOutputFrameBuf != 0)
glDeleteFramebuffers(1, &mOutputFrameBuf);
if (mUnpinnedTextureBuffer != 0)
@@ -667,6 +688,26 @@ bool OpenGLComposite::InitDeckLink()
goto error;
}
mAudioEnabled = mRuntimeHost ? mRuntimeHost->AudioEnabled() : true;
if (mAudioEnabled)
{
if (mDLInput->QueryInterface(IID_IDeckLinkConfiguration, (void**)&mDLInputConfiguration) == S_OK && mDLInputConfiguration != NULL)
{
if (mDLInputConfiguration->SetInt(bmdDeckLinkConfigAudioInputConnection, bmdAudioConnectionEmbedded) != S_OK)
OutputDebugStringA("Could not force DeckLink audio input connection to embedded; using current device setting.\n");
}
else
{
OutputDebugStringA("Could not query DeckLink input configuration; using current audio input connection.\n");
}
}
if (mAudioEnabled && mDLInput->EnableAudioInput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, kAudioChannelCount) != S_OK)
{
OutputDebugStringA("Could not enable DeckLink audio input; continuing without audio.\n");
mAudioEnabled = false;
}
mCaptureDelegate = new CaptureDelegate(this);
if (mDLInput->SetCallback(mCaptureDelegate) != S_OK)
goto error;
@@ -680,6 +721,13 @@ bool OpenGLComposite::InitDeckLink()
if (mDLOutput->EnableVideoOutput(outputDisplayMode, bmdVideoOutputFlagDefault) != S_OK)
goto error;
if (mAudioEnabled && mDLOutput->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, kAudioChannelCount, bmdAudioOutputStreamTimestamped) != S_OK)
{
OutputDebugStringA("Could not enable DeckLink audio output; continuing without audio.\n");
mDLInput->DisableAudioInput();
mAudioEnabled = false;
}
if (mDLOutput->QueryInterface(IID_IDeckLinkKeyer, (void**)&mDLKeyer) == S_OK && mDLKeyer != NULL)
mDeckLinkKeyerInterfaceAvailable = true;
@@ -748,6 +796,14 @@ bool OpenGLComposite::InitDeckLink()
if (mDLOutput->SetScheduledFrameCompletionCallback(mPlayoutDelegate) != S_OK)
goto error;
if (mAudioEnabled && mDLOutput->SetAudioCallback(mPlayoutDelegate) != S_OK)
{
OutputDebugStringA("Could not set DeckLink audio output callback; continuing without audio.\n");
mDLInput->DisableAudioInput();
mDLOutput->DisableAudioOutput();
mAudioEnabled = false;
}
bSuccess = true;
error:
@@ -770,6 +826,11 @@ error:
mDLOutput->Release();
mDLOutput = NULL;
}
if (mDLInputConfiguration != NULL)
{
mDLInputConfiguration->Release();
mDLInputConfiguration = NULL;
}
}
if (pDL != NULL)
@@ -1052,6 +1113,14 @@ bool OpenGLComposite::InitOpenGLState()
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, mOutputFrameWidth, mOutputFrameHeight, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
glGenTextures(1, &mAudioDataTexture);
glBindTexture(GL_TEXTURE_2D, mAudioDataTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, kAudioTextureWidth, kAudioTextureHeight, 0, GL_RGBA, GL_FLOAT, mAudioAnalysis.texture.data());
glBindFramebuffer(GL_FRAMEBUFFER, mOutputFrameBuf);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mOutputTexture, 0);
glStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
@@ -1135,6 +1204,73 @@ void OpenGLComposite::VideoFrameArrived(IDeckLinkVideoInputFrame* inputFrame, bo
inputFrameBuffer->Release();
}
void OpenGLComposite::AudioPacketArrived(IDeckLinkAudioInputPacket* audioPacket)
{
if (!mAudioEnabled || !audioPacket)
return;
void* audioBytes = nullptr;
if (audioPacket->GetBytes(&audioBytes) != S_OK || !audioBytes)
return;
const long sampleFrameCount = audioPacket->GetSampleFrameCount();
if (sampleFrameCount <= 0)
return;
mAudioDelayBuffer.PushInterleaved(static_cast<const int32_t*>(audioBytes), static_cast<std::size_t>(sampleFrameCount));
updateAudioStatus();
}
HRESULT OpenGLComposite::RenderAudioSamples(BOOL preroll)
{
if (!mAudioEnabled || !mDLOutput)
return S_OK;
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
unsigned bufferedSampleFrames = 0;
if (mDLOutput->GetBufferedAudioSampleFrameCount(&bufferedSampleFrames) != S_OK)
{
OutputDebugStringA("Could not query DeckLink buffered audio sample count.\n");
return E_FAIL;
}
const unsigned delayedSampleFrames = delayedAudioSampleFrames();
const unsigned waterLevel = kAudioOutputWaterLevelSampleFrames > delayedSampleFrames
? kAudioOutputWaterLevelSampleFrames
: delayedSampleFrames;
if (bufferedSampleFrames >= waterLevel)
return S_OK;
const unsigned requestedSampleFrames = waterLevel - bufferedSampleFrames;
bool underrun = false;
AudioFrameBlock audioBlock = mAudioDelayBuffer.Pop(requestedSampleFrames, underrun);
mAudioAnalysis = mAudioAnalyzer.Analyze(audioBlock);
unsigned sampleFramesWritten = 0;
const unsigned sampleFrames = static_cast<unsigned>(audioBlock.frameCount());
const HRESULT scheduleResult = mDLOutput->ScheduleAudioSamples(
audioBlock.interleavedSamples.data(),
sampleFrames,
static_cast<BMDTimeValue>(mNextAudioSampleFrame),
kAudioSampleRate,
&sampleFramesWritten);
if (scheduleResult == S_OK)
{
if (sampleFramesWritten == 0 && sampleFrames > 0)
OutputDebugStringA("DeckLink accepted audio schedule call but wrote 0 sample frames.\n");
mNextAudioSampleFrame += sampleFramesWritten;
}
else
{
OutputDebugStringA("DeckLink ScheduleAudioSamples failed while topping up audio output.\n");
}
updateAudioStatus();
return scheduleResult;
}
// Render the live video texture through the runtime shader into the off-screen framebuffer.
// Read the result back from the frame buffer and schedule it for playout.
void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult)
@@ -1149,6 +1285,16 @@ void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame,
// make GL context current in this thread
wglMakeCurrent( hGLDC, hGLRC );
if (mAudioEnabled)
{
AudioAnalysisSnapshot audioAnalysis;
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
audioAnalysis = mAudioAnalysis;
}
updateAudioDataTexture(audioAnalysis);
}
// Draw the effect output to the off-screen framebuffer.
const auto renderStartTime = std::chrono::steady_clock::now();
if (mFastTransferExtensionAvailable)
@@ -1231,9 +1377,25 @@ void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame,
bool OpenGLComposite::Start()
{
mTotalPlayoutFrames = 0;
initializeAudioDelay();
if (mAudioEnabled)
{
mDLOutput->FlushBufferedAudioSamples();
if (mDLOutput->BeginAudioPreroll() != S_OK)
{
OutputDebugStringA("Could not begin DeckLink audio preroll; continuing without audio.\n");
mDLInput->DisableAudioInput();
mDLOutput->DisableAudioOutput();
mAudioEnabled = false;
}
else
{
mAudioPrerolling = true;
}
}
// Preroll frames
for (unsigned i = 0; i < 5; i++)
for (unsigned i = 0; i < kVideoPrerollFrameCount; i++)
{
// Take each video frame from the front of the queue and move it to the back
IDeckLinkMutableVideoFrame* outputVideoFrame = mDLOutputVideoFrameQueue.front();
@@ -1264,8 +1426,26 @@ bool OpenGLComposite::Start()
mTotalPlayoutFrames++;
}
mDLInput->StartStreams();
mDLOutput->StartScheduledPlayback(0, mFrameTimescale, 1.0);
if (mAudioEnabled)
RenderAudioSamples(TRUE);
if (mAudioPrerolling)
{
if (mDLOutput->EndAudioPreroll() != S_OK)
{
OutputDebugStringA("Could not end DeckLink audio preroll; continuing without audio.\n");
mDLInput->DisableAudioInput();
mDLOutput->DisableAudioOutput();
mAudioEnabled = false;
}
mAudioPrerolling = false;
}
if (mDLInput->StartStreams() != S_OK)
return false;
if (mDLOutput->StartScheduledPlayback(0, mFrameTimescale, 1.0) != S_OK)
return false;
return true;
}
@@ -1297,9 +1477,16 @@ bool OpenGLComposite::Stop()
mDLInput->StopStreams();
mDLInput->DisableVideoInput();
if (mAudioEnabled)
mDLInput->DisableAudioInput();
mDLOutput->StopScheduledPlayback(0, NULL, 0);
mDLOutput->SetAudioCallback(NULL);
mDLOutput->SetScheduledFrameCompletionCallback(NULL);
mDLOutput->DisableVideoOutput();
mAudioPrerolling = false;
if (mAudioEnabled)
mDLOutput->DisableAudioOutput();
return true;
}
@@ -1411,6 +1598,9 @@ bool OpenGLComposite::compileSingleLayerProgram(const RuntimeRenderState& state,
const GLint videoInputLocation = glGetUniformLocation(newProgram.get(), "gVideoInput");
if (videoInputLocation >= 0)
glUniform1i(videoInputLocation, static_cast<GLint>(kDecodedVideoTextureUnit));
const GLint audioDataLocation = glGetUniformLocation(newProgram.get(), "gAudioData");
if (audioDataLocation >= 0)
glUniform1i(audioDataLocation, static_cast<GLint>(kAudioDataTextureUnit));
for (unsigned index = 0; index < historyCap; ++index)
{
const std::string sourceSamplerName = "gSourceHistory" + std::to_string(index);
@@ -1973,6 +2163,8 @@ void OpenGLComposite::renderShaderProgram(GLuint sourceTexture, GLuint destinati
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glActiveTexture(GL_TEXTURE0 + kDecodedVideoTextureUnit);
glBindTexture(GL_TEXTURE_2D, sourceTexture);
glActiveTexture(GL_TEXTURE0 + kAudioDataTextureUnit);
glBindTexture(GL_TEXTURE_2D, mAudioDataTexture);
bindHistorySamplers(state, sourceTexture);
bindLayerTextureAssets(layerProgram);
glBindVertexArray(mFullscreenVAO);
@@ -1995,6 +2187,8 @@ void OpenGLComposite::renderShaderProgram(GLuint sourceTexture, GLuint destinati
glActiveTexture(GL_TEXTURE0 + shaderTextureBase + static_cast<GLuint>(index));
glBindTexture(GL_TEXTURE_2D, 0);
}
glActiveTexture(GL_TEXTURE0 + kAudioDataTextureUnit);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0 + kDecodedVideoTextureUnit);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
@@ -2066,6 +2260,55 @@ void OpenGLComposite::broadcastRuntimeState()
mControlServer->BroadcastState();
}
unsigned OpenGLComposite::delayedAudioSampleFrames() const
{
return static_cast<unsigned>(AudioSampleTimeForVideoFrame(kVideoPrerollFrameCount, mFrameDuration, mFrameTimescale));
}
void OpenGLComposite::initializeAudioDelay()
{
std::lock_guard<std::mutex> audioLock(mAudioStateMutex);
mAudioAnalyzer.Reset();
mAudioAnalysis = AudioAnalysisSnapshot();
mAudioDelayBuffer.Reset(delayedAudioSampleFrames());
mNextAudioSampleFrame = 0;
updateAudioStatus();
}
AudioFrameBlock OpenGLComposite::popAudioForVideoFrame(uint64_t videoFrameIndex)
{
const unsigned sampleFrames = AudioSamplesForVideoFrame(videoFrameIndex, mFrameDuration, mFrameTimescale);
bool underrun = false;
AudioFrameBlock block = mAudioDelayBuffer.Pop(sampleFrames, underrun);
mAudioAnalysis = mAudioAnalyzer.Analyze(block);
return block;
}
void OpenGLComposite::updateAudioDataTexture(const AudioAnalysisSnapshot& analysis)
{
if (mAudioDataTexture == 0)
return;
glActiveTexture(GL_TEXTURE0 + kAudioDataTextureUnit);
glBindTexture(GL_TEXTURE_2D, mAudioDataTexture);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kAudioTextureWidth, kAudioTextureHeight, GL_RGBA, GL_FLOAT, analysis.texture.data());
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
}
void OpenGLComposite::updateAudioStatus()
{
if (!mRuntimeHost)
return;
AudioStatusSnapshot status;
status.enabled = mAudioEnabled;
status.bufferedSampleFrames = mAudioDelayBuffer.BufferedSampleFrames();
status.underrunCount = mAudioDelayBuffer.UnderrunCount();
status.analysis = mAudioAnalysis;
mRuntimeHost->SetAudioStatus(status);
}
bool OpenGLComposite::updateGlobalParamsBuffer(const RuntimeRenderState& state, unsigned availableSourceHistoryLength, unsigned availableTemporalHistoryLength)
{
std::vector<unsigned char> buffer;
@@ -2085,6 +2328,15 @@ bool OpenGLComposite::updateGlobalParamsBuffer(const RuntimeRenderState& state,
: 0u;
AppendStd140Int(buffer, static_cast<int>(effectiveSourceHistoryLength));
AppendStd140Int(buffer, static_cast<int>(effectiveTemporalHistoryLength));
AppendStd140Vec2(buffer, state.audioAnalysis.rms[0], state.audioAnalysis.rms[1]);
AppendStd140Vec2(buffer, state.audioAnalysis.peak[0], state.audioAnalysis.peak[1]);
AppendStd140Float(buffer, state.audioAnalysis.monoRms);
AppendStd140Float(buffer, state.audioAnalysis.monoPeak);
AppendStd140Vec4(buffer,
state.audioAnalysis.bands[0],
state.audioAnalysis.bands[1],
state.audioAnalysis.bands[2],
state.audioAnalysis.bands[3]);
for (const ShaderParameterDefinition& definition : state.parameterDefinitions)
{
@@ -2623,11 +2875,14 @@ ULONG CaptureDelegate::Release()
return newCount;
}
HRESULT CaptureDelegate::VideoInputFrameArrived(IDeckLinkVideoInputFrame* inputFrame, IDeckLinkAudioInputPacket* /*audioPacket*/)
HRESULT CaptureDelegate::VideoInputFrameArrived(IDeckLinkVideoInputFrame* inputFrame, IDeckLinkAudioInputPacket* audioPacket)
{
if (audioPacket)
m_pOwner->AudioPacketArrived(audioPacket);
if (! inputFrame)
{
// It's possible to receive a NULL inputFrame, but a valid audioPacket. Ignore audio-only frame.
// It's possible to receive a NULL inputFrame, but a valid audioPacket.
return S_OK;
}
@@ -2653,6 +2908,23 @@ PlayoutDelegate::PlayoutDelegate(OpenGLComposite* pOwner) :
HRESULT PlayoutDelegate::QueryInterface(REFIID iid, LPVOID *ppv)
{
if (ppv == nullptr)
return E_POINTER;
if (iid == IID_IUnknown || iid == IID_IDeckLinkVideoOutputCallback)
{
*ppv = static_cast<IDeckLinkVideoOutputCallback*>(this);
AddRef();
return S_OK;
}
if (iid == IID_IDeckLinkAudioOutputCallback)
{
*ppv = static_cast<IDeckLinkAudioOutputCallback*>(this);
AddRef();
return S_OK;
}
*ppv = NULL;
return E_NOINTERFACE;
}
@@ -2694,3 +2966,8 @@ HRESULT PlayoutDelegate::ScheduledPlaybackHasStopped ()
{
return S_OK;
}
HRESULT PlayoutDelegate::RenderAudioSamples (BOOL preroll)
{
return m_pOwner->RenderAudioSamples(preroll);
}