diff --git a/CMakeLists.txt b/CMakeLists.txt index 572b1c5..e2b2d08 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,8 @@ if(NOT EXISTS "${GPUDIRECT_DIR}/lib/x64/dvp.lib") endif() set(APP_SOURCES + "${APP_DIR}/AudioSupport.cpp" + "${APP_DIR}/AudioSupport.h" "${APP_DIR}/ControlServer.cpp" "${APP_DIR}/ControlServer.h" "${APP_DIR}/DeckLinkAPI_i.c" @@ -147,6 +149,21 @@ endif() add_test(NAME OscServerTests COMMAND OscServerTests) +add_executable(AudioSupportTests + "${APP_DIR}/AudioSupport.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/tests/AudioSupportTests.cpp" +) + +target_include_directories(AudioSupportTests PRIVATE + "${APP_DIR}" +) + +if(MSVC) + target_compile_options(AudioSupportTests PRIVATE /W3) +endif() + +add_test(NAME AudioSupportTests COMMAND AudioSupportTests) + add_custom_command(TARGET LoopThroughWithOpenGLCompositing POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${GPUDIRECT_DIR}/bin/x64/dvp.dll" diff --git a/README.md b/README.md index 3a18a85..ad1e708 100644 --- a/README.md +++ b/README.md @@ -126,12 +126,18 @@ Current native test coverage includes: "outputFrameRate": "59.94", "autoReload": true, "maxTemporalHistoryFrames": 12, + "audioEnabled": true, + "audioChannelCount": 2, + "audioSampleRate": 48000, + "audioDelayMode": "matchVideoPreroll", "enableExternalKeying": true } ``` `inputVideoFormat`/`inputFrameRate` select the DeckLink capture mode. `outputVideoFormat`/`outputFrameRate` select the playout mode. The shader stack runs at input resolution and the final rendered frame is scaled once into the configured output mode. Common examples include `720p`/`50`, `720p`/`59.94`, `1080i`/`50`, `1080i`/`59.94`, `1080p`/`25`, `1080p`/`50`, `1080p`/`59.94`, and `2160p`/`59.94`, depending on card support. +`audioEnabled` enables embedded stereo 48 kHz PCM pass-through. Audio is delayed to match the scheduled video preroll and the synchronized level/spectrum data is exposed to shaders. + Legacy `videoFormat` and `frameRate` keys are still accepted and apply to both input and output unless the explicit input/output keys are present. The control UI is available at: diff --git a/SHADER_CONTRACT.md b/SHADER_CONTRACT.md index fa1c460..2316ad3 100644 --- a/SHADER_CONTRACT.md +++ b/SHADER_CONTRACT.md @@ -125,6 +125,11 @@ struct ShaderContext float bypass; int sourceHistoryLength; int temporalHistoryLength; + float2 audioRms; + float2 audioPeak; + float audioMonoRms; + float audioMonoPeak; + float4 audioBands; }; ``` @@ -140,6 +145,11 @@ Fields: - `bypass`: `1.0` when the layer is bypassed, otherwise `0.0`. - `sourceHistoryLength`: number of usable source-history frames currently available. - `temporalHistoryLength`: number of usable temporal frames currently available for this layer. +- `audioRms`: left/right RMS level for the audio block synchronized with the rendered output frame. +- `audioPeak`: left/right peak level for the same synchronized audio block. +- `audioMonoRms`: mono RMS level derived from left/right. +- `audioMonoPeak`: mono peak level derived from left/right. +- `audioBands`: four smoothed, normalized low-to-high frequency bands. ## Helper Functions @@ -149,6 +159,8 @@ The wrapper provides: float4 sampleVideo(float2 uv); float4 sampleSourceHistory(int framesAgo, float2 uv); float4 sampleTemporalHistory(int framesAgo, float2 uv); +float4 sampleAudioWaveform(float x); +float4 sampleAudioSpectrum(float x); ``` `sampleVideo` samples the live decoded source video. @@ -157,6 +169,10 @@ float4 sampleTemporalHistory(int framesAgo, float2 uv); `sampleTemporalHistory` samples previous pre-layer input frames for temporal shaders that request `preLayerInput` history. `framesAgo` is clamped into the available range. If no temporal history is available, it falls back to `sampleVideo`. +`sampleAudioWaveform` samples the current synchronized audio waveform texture. `x` is normalized `0..1`; returned waveform channels are encoded from `-1..1` into `0..1`. + +`sampleAudioSpectrum` samples the current synchronized audio spectrum texture. Values are normalized `0..1`. + Example: ```slang diff --git a/apps/LoopThroughWithOpenGLCompositing/AudioSupport.cpp b/apps/LoopThroughWithOpenGLCompositing/AudioSupport.cpp new file mode 100644 index 0000000..dce5a04 --- /dev/null +++ b/apps/LoopThroughWithOpenGLCompositing/AudioSupport.cpp @@ -0,0 +1,206 @@ +#include "AudioSupport.h" + +#include +#include +#include +#include + +namespace +{ +constexpr float kInt32ToFloat = 1.0f / 2147483648.0f; +constexpr std::size_t kAnalysisWindowSamples = 1024; +constexpr std::size_t kMaxBufferedAudioFrames = kAudioSampleRate * 10; + +float Clamp01(float value) +{ + return std::max(0.0f, std::min(1.0f, value)); +} + +float SampleToFloat(int32_t sample) +{ + return std::max(-1.0f, std::min(1.0f, static_cast(sample) * kInt32ToFloat)); +} + +float GoertzelMagnitude(const std::vector& samples, float frequency) +{ + if (samples.empty()) + return 0.0f; + + const double omega = 2.0 * 3.14159265358979323846 * static_cast(frequency) / static_cast(kAudioSampleRate); + const double coefficient = 2.0 * std::cos(omega); + double q0 = 0.0; + double q1 = 0.0; + double q2 = 0.0; + + for (float sample : samples) + { + q0 = coefficient * q1 - q2 + static_cast(sample); + q2 = q1; + q1 = q0; + } + + const double power = q1 * q1 + q2 * q2 - coefficient * q1 * q2; + return static_cast(std::sqrt(std::max(0.0, power)) / static_cast(samples.size())); +} +} + +uint64_t AudioSampleTimeForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate) +{ + if (frameTimescale == 0) + return 0; + + const uint64_t numerator = videoFrameIndex * frameDuration * audioSampleRate; + return (numerator + frameTimescale / 2) / frameTimescale; +} + +unsigned AudioSamplesForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate) +{ + const uint64_t start = AudioSampleTimeForVideoFrame(videoFrameIndex, frameDuration, frameTimescale, audioSampleRate); + const uint64_t end = AudioSampleTimeForVideoFrame(videoFrameIndex + 1, frameDuration, frameTimescale, audioSampleRate); + return static_cast(end > start ? end - start : 0); +} + +void AudioDelayBuffer::Reset(unsigned delaySampleFrames) +{ + std::lock_guard lock(mMutex); + mSamples.clear(); + mSamples.resize(static_cast(delaySampleFrames) * kAudioChannelCount, 0); + mUnderrunCount = 0; +} + +void AudioDelayBuffer::PushInterleaved(const int32_t* samples, std::size_t sampleFrameCount) +{ + if (!samples || sampleFrameCount == 0) + return; + + std::lock_guard lock(mMutex); + const std::size_t sampleCount = sampleFrameCount * kAudioChannelCount; + for (std::size_t index = 0; index < sampleCount; ++index) + mSamples.push_back(samples[index]); + + const std::size_t maxSamples = kMaxBufferedAudioFrames * kAudioChannelCount; + while (mSamples.size() > maxSamples) + mSamples.pop_front(); +} + +AudioFrameBlock AudioDelayBuffer::Pop(std::size_t sampleFrameCount, bool& underrun) +{ + AudioFrameBlock block; + block.interleavedSamples.resize(sampleFrameCount * kAudioChannelCount, 0); + + std::lock_guard lock(mMutex); + const std::size_t requestedSamples = sampleFrameCount * kAudioChannelCount; + underrun = mSamples.size() < requestedSamples; + if (underrun) + ++mUnderrunCount; + + const std::size_t availableSamples = std::min(requestedSamples, mSamples.size()); + for (std::size_t index = 0; index < availableSamples; ++index) + { + block.interleavedSamples[index] = mSamples.front(); + mSamples.pop_front(); + } + + return block; +} + +unsigned AudioDelayBuffer::BufferedSampleFrames() const +{ + std::lock_guard lock(mMutex); + return static_cast(mSamples.size() / kAudioChannelCount); +} + +uint64_t AudioDelayBuffer::UnderrunCount() const +{ + std::lock_guard lock(mMutex); + return mUnderrunCount; +} + +void AudioAnalyzer::Reset() +{ + mMonoHistory.clear(); + mSmoothedBands = { 0.0f, 0.0f, 0.0f, 0.0f }; + mCurrent = AudioAnalysisSnapshot(); +} + +AudioAnalysisSnapshot AudioAnalyzer::Analyze(const AudioFrameBlock& block) +{ + AudioAnalysisSnapshot next; + double sumSquares[2] = { 0.0, 0.0 }; + float peak[2] = { 0.0f, 0.0f }; + double monoSumSquares = 0.0; + float monoPeak = 0.0f; + const std::size_t frames = block.frameCount(); + + for (std::size_t frame = 0; frame < frames; ++frame) + { + const float left = SampleToFloat(block.interleavedSamples[frame * 2]); + const float right = SampleToFloat(block.interleavedSamples[frame * 2 + 1]); + const float mono = (left + right) * 0.5f; + + sumSquares[0] += static_cast(left) * left; + sumSquares[1] += static_cast(right) * right; + peak[0] = std::max(peak[0], std::abs(left)); + peak[1] = std::max(peak[1], std::abs(right)); + monoSumSquares += static_cast(mono) * mono; + monoPeak = std::max(monoPeak, std::abs(mono)); + + mMonoHistory.push_back(mono); + while (mMonoHistory.size() > kAnalysisWindowSamples) + mMonoHistory.pop_front(); + } + + if (frames > 0) + { + next.rms[0] = static_cast(std::sqrt(sumSquares[0] / static_cast(frames))); + next.rms[1] = static_cast(std::sqrt(sumSquares[1] / static_cast(frames))); + next.peak[0] = peak[0]; + next.peak[1] = peak[1]; + next.monoRms = static_cast(std::sqrt(monoSumSquares / static_cast(frames))); + next.monoPeak = monoPeak; + } + + std::vector window(mMonoHistory.begin(), mMonoHistory.end()); + const float bandFrequencies[4] = { 90.0f, 300.0f, 1200.0f, 5000.0f }; + for (std::size_t band = 0; band < next.bands.size(); ++band) + { + const float raw = Clamp01(GoertzelMagnitude(window, bandFrequencies[band]) * 8.0f); + const float smoothing = raw > mSmoothedBands[band] ? 0.45f : 0.12f; + mSmoothedBands[band] = mSmoothedBands[band] + (raw - mSmoothedBands[band]) * smoothing; + next.bands[band] = Clamp01(mSmoothedBands[band]); + } + + for (unsigned x = 0; x < kAudioTextureWidth; ++x) + { + float mono = 0.0f; + if (!mMonoHistory.empty()) + { + const std::size_t historyIndex = static_cast( + (static_cast(x) * static_cast(mMonoHistory.size())) / kAudioTextureWidth); + auto it = mMonoHistory.begin(); + std::advance(it, std::min(historyIndex, mMonoHistory.size() - 1)); + mono = *it; + } + + const std::size_t waveformOffset = x * 4; + next.texture[waveformOffset + 0] = mono * 0.5f + 0.5f; + next.texture[waveformOffset + 1] = next.texture[waveformOffset + 0]; + next.texture[waveformOffset + 2] = next.monoRms; + next.texture[waveformOffset + 3] = 1.0f; + + const float bandPosition = static_cast(x) / static_cast(kAudioTextureWidth - 1); + const float scaled = bandPosition * static_cast(next.bands.size() - 1); + const unsigned bandA = static_cast(std::floor(scaled)); + const unsigned bandB = std::min(bandA + 1, static_cast(next.bands.size() - 1)); + const float t = scaled - static_cast(bandA); + const float spectrum = next.bands[bandA] * (1.0f - t) + next.bands[bandB] * t; + const std::size_t spectrumOffset = (kAudioTextureWidth + x) * 4; + next.texture[spectrumOffset + 0] = spectrum; + next.texture[spectrumOffset + 1] = next.bands[0]; + next.texture[spectrumOffset + 2] = next.bands[1]; + next.texture[spectrumOffset + 3] = next.bands[2]; + } + + mCurrent = next; + return mCurrent; +} diff --git a/apps/LoopThroughWithOpenGLCompositing/AudioSupport.h b/apps/LoopThroughWithOpenGLCompositing/AudioSupport.h new file mode 100644 index 0000000..c9201db --- /dev/null +++ b/apps/LoopThroughWithOpenGLCompositing/AudioSupport.h @@ -0,0 +1,71 @@ +#pragma once + +#include +#include +#include +#include +#include + +constexpr unsigned kAudioSampleRate = 48000; +constexpr unsigned kAudioChannelCount = 2; +constexpr unsigned kAudioTextureWidth = 64; +constexpr unsigned kAudioTextureHeight = 2; + +struct AudioFrameBlock +{ + std::vector interleavedSamples; + + std::size_t frameCount() const + { + return interleavedSamples.size() / kAudioChannelCount; + } +}; + +struct AudioAnalysisSnapshot +{ + std::array rms = { 0.0f, 0.0f }; + std::array peak = { 0.0f, 0.0f }; + float monoRms = 0.0f; + float monoPeak = 0.0f; + std::array bands = { 0.0f, 0.0f, 0.0f, 0.0f }; + std::array texture = {}; +}; + +struct AudioStatusSnapshot +{ + bool enabled = false; + unsigned bufferedSampleFrames = 0; + uint64_t underrunCount = 0; + AudioAnalysisSnapshot analysis; +}; + +class AudioDelayBuffer +{ +public: + void Reset(unsigned delaySampleFrames); + void PushInterleaved(const int32_t* samples, std::size_t sampleFrameCount); + AudioFrameBlock Pop(std::size_t sampleFrameCount, bool& underrun); + unsigned BufferedSampleFrames() const; + uint64_t UnderrunCount() const; + +private: + mutable std::mutex mMutex; + std::deque mSamples; + uint64_t mUnderrunCount = 0; +}; + +class AudioAnalyzer +{ +public: + void Reset(); + AudioAnalysisSnapshot Analyze(const AudioFrameBlock& block); + const AudioAnalysisSnapshot& Current() const { return mCurrent; } + +private: + std::deque mMonoHistory; + std::array mSmoothedBands = { 0.0f, 0.0f, 0.0f, 0.0f }; + AudioAnalysisSnapshot mCurrent; +}; + +uint64_t AudioSampleTimeForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate = kAudioSampleRate); +unsigned AudioSamplesForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate = kAudioSampleRate); diff --git a/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.cpp b/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.cpp index da032c3..43c3fb0 100644 --- a/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.cpp @@ -60,9 +60,16 @@ DEFINE_GUID(IID_PinnedMemoryAllocator, namespace { constexpr GLuint kDecodedVideoTextureUnit = 1; -constexpr GLuint kSourceHistoryTextureUnitBase = 2; +constexpr GLuint kAudioDataTextureUnit = 2; +constexpr GLuint kSourceHistoryTextureUnitBase = 3; constexpr GLuint kPackedVideoTextureUnit = 2; constexpr GLuint kGlobalParamsBindingPoint = 0; +constexpr unsigned kVideoPrerollFrameCount = 5; +constexpr unsigned kAudioOutputWaterLevelSampleFrames = kAudioSampleRate / 2; + +#ifndef GL_RGBA32F +#define GL_RGBA32F 0x8814 +#endif const char* kVertexShaderSource = "#version 430 core\n" "out vec2 vTexCoord;\n" @@ -315,8 +322,10 @@ void AppendStd140Vec4(std::vector& buffer, float x, float y, floa OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) : hGLWnd(hWnd), hGLDC(hDC), hGLRC(hRC), mCaptureDelegate(NULL), mPlayoutDelegate(NULL), - mDLInput(NULL), mDLOutput(NULL), mDLKeyer(NULL), + mDLInput(NULL), mDLOutput(NULL), mDLInputConfiguration(NULL), mDLKeyer(NULL), mPlayoutAllocator(NULL), + mTotalPlayoutFrames(0), + mNextAudioSampleFrame(0), mInputFrameWidth(0), mInputFrameHeight(0), mOutputFrameWidth(0), mOutputFrameHeight(0), mInputDisplayModeName("1080p59.94"), @@ -332,6 +341,7 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) : mLayerTempTexture(0), mFBOTexture(0), mOutputTexture(0), + mAudioDataTexture(0), mUnpinnedTextureBuffer(0), mDecodeFrameBuf(0), mLayerTempFrameBuf(0), @@ -347,6 +357,8 @@ OpenGLComposite::OpenGLComposite(HWND hWnd, HDC hDC, HGLRC hRC) : mGlobalParamsUBOSize(0), mViewWidth(0), mViewHeight(0), + mAudioEnabled(false), + mAudioPrerolling(false), mTemporalHistoryNeedsReset(true) { InitializeCriticalSection(&pMutex); @@ -362,6 +374,12 @@ OpenGLComposite::~OpenGLComposite() { mDLInput->SetCallback(NULL); + if (mDLInputConfiguration != NULL) + { + mDLInputConfiguration->Release(); + mDLInputConfiguration = NULL; + } + mDLInput->Release(); mDLInput = NULL; } @@ -394,6 +412,7 @@ OpenGLComposite::~OpenGLComposite() } mDLOutput->SetScheduledFrameCompletionCallback(NULL); + mDLOutput->SetAudioCallback(NULL); mDLOutput->Release(); mDLOutput = NULL; @@ -435,6 +454,8 @@ OpenGLComposite::~OpenGLComposite() glDeleteTextures(1, &mFBOTexture); if (mOutputTexture != 0) glDeleteTextures(1, &mOutputTexture); + if (mAudioDataTexture != 0) + glDeleteTextures(1, &mAudioDataTexture); if (mOutputFrameBuf != 0) glDeleteFramebuffers(1, &mOutputFrameBuf); if (mUnpinnedTextureBuffer != 0) @@ -667,6 +688,26 @@ bool OpenGLComposite::InitDeckLink() goto error; } + mAudioEnabled = mRuntimeHost ? mRuntimeHost->AudioEnabled() : true; + if (mAudioEnabled) + { + if (mDLInput->QueryInterface(IID_IDeckLinkConfiguration, (void**)&mDLInputConfiguration) == S_OK && mDLInputConfiguration != NULL) + { + if (mDLInputConfiguration->SetInt(bmdDeckLinkConfigAudioInputConnection, bmdAudioConnectionEmbedded) != S_OK) + OutputDebugStringA("Could not force DeckLink audio input connection to embedded; using current device setting.\n"); + } + else + { + OutputDebugStringA("Could not query DeckLink input configuration; using current audio input connection.\n"); + } + } + + if (mAudioEnabled && mDLInput->EnableAudioInput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, kAudioChannelCount) != S_OK) + { + OutputDebugStringA("Could not enable DeckLink audio input; continuing without audio.\n"); + mAudioEnabled = false; + } + mCaptureDelegate = new CaptureDelegate(this); if (mDLInput->SetCallback(mCaptureDelegate) != S_OK) goto error; @@ -680,6 +721,13 @@ bool OpenGLComposite::InitDeckLink() if (mDLOutput->EnableVideoOutput(outputDisplayMode, bmdVideoOutputFlagDefault) != S_OK) goto error; + if (mAudioEnabled && mDLOutput->EnableAudioOutput(bmdAudioSampleRate48kHz, bmdAudioSampleType32bitInteger, kAudioChannelCount, bmdAudioOutputStreamTimestamped) != S_OK) + { + OutputDebugStringA("Could not enable DeckLink audio output; continuing without audio.\n"); + mDLInput->DisableAudioInput(); + mAudioEnabled = false; + } + if (mDLOutput->QueryInterface(IID_IDeckLinkKeyer, (void**)&mDLKeyer) == S_OK && mDLKeyer != NULL) mDeckLinkKeyerInterfaceAvailable = true; @@ -748,6 +796,14 @@ bool OpenGLComposite::InitDeckLink() if (mDLOutput->SetScheduledFrameCompletionCallback(mPlayoutDelegate) != S_OK) goto error; + if (mAudioEnabled && mDLOutput->SetAudioCallback(mPlayoutDelegate) != S_OK) + { + OutputDebugStringA("Could not set DeckLink audio output callback; continuing without audio.\n"); + mDLInput->DisableAudioInput(); + mDLOutput->DisableAudioOutput(); + mAudioEnabled = false; + } + bSuccess = true; error: @@ -770,6 +826,11 @@ error: mDLOutput->Release(); mDLOutput = NULL; } + if (mDLInputConfiguration != NULL) + { + mDLInputConfiguration->Release(); + mDLInputConfiguration = NULL; + } } if (pDL != NULL) @@ -1052,6 +1113,14 @@ bool OpenGLComposite::InitOpenGLState() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, mOutputFrameWidth, mOutputFrameHeight, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL); + glGenTextures(1, &mAudioDataTexture); + glBindTexture(GL_TEXTURE_2D, mAudioDataTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, kAudioTextureWidth, kAudioTextureHeight, 0, GL_RGBA, GL_FLOAT, mAudioAnalysis.texture.data()); + glBindFramebuffer(GL_FRAMEBUFFER, mOutputFrameBuf); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mOutputTexture, 0); glStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); @@ -1135,6 +1204,73 @@ void OpenGLComposite::VideoFrameArrived(IDeckLinkVideoInputFrame* inputFrame, bo inputFrameBuffer->Release(); } +void OpenGLComposite::AudioPacketArrived(IDeckLinkAudioInputPacket* audioPacket) +{ + if (!mAudioEnabled || !audioPacket) + return; + + void* audioBytes = nullptr; + if (audioPacket->GetBytes(&audioBytes) != S_OK || !audioBytes) + return; + + const long sampleFrameCount = audioPacket->GetSampleFrameCount(); + if (sampleFrameCount <= 0) + return; + + mAudioDelayBuffer.PushInterleaved(static_cast(audioBytes), static_cast(sampleFrameCount)); + updateAudioStatus(); +} + +HRESULT OpenGLComposite::RenderAudioSamples(BOOL preroll) +{ + if (!mAudioEnabled || !mDLOutput) + return S_OK; + + std::lock_guard audioLock(mAudioStateMutex); + + unsigned bufferedSampleFrames = 0; + if (mDLOutput->GetBufferedAudioSampleFrameCount(&bufferedSampleFrames) != S_OK) + { + OutputDebugStringA("Could not query DeckLink buffered audio sample count.\n"); + return E_FAIL; + } + + const unsigned delayedSampleFrames = delayedAudioSampleFrames(); + const unsigned waterLevel = kAudioOutputWaterLevelSampleFrames > delayedSampleFrames + ? kAudioOutputWaterLevelSampleFrames + : delayedSampleFrames; + if (bufferedSampleFrames >= waterLevel) + return S_OK; + + const unsigned requestedSampleFrames = waterLevel - bufferedSampleFrames; + bool underrun = false; + AudioFrameBlock audioBlock = mAudioDelayBuffer.Pop(requestedSampleFrames, underrun); + mAudioAnalysis = mAudioAnalyzer.Analyze(audioBlock); + + unsigned sampleFramesWritten = 0; + const unsigned sampleFrames = static_cast(audioBlock.frameCount()); + const HRESULT scheduleResult = mDLOutput->ScheduleAudioSamples( + audioBlock.interleavedSamples.data(), + sampleFrames, + static_cast(mNextAudioSampleFrame), + kAudioSampleRate, + &sampleFramesWritten); + + if (scheduleResult == S_OK) + { + if (sampleFramesWritten == 0 && sampleFrames > 0) + OutputDebugStringA("DeckLink accepted audio schedule call but wrote 0 sample frames.\n"); + mNextAudioSampleFrame += sampleFramesWritten; + } + else + { + OutputDebugStringA("DeckLink ScheduleAudioSamples failed while topping up audio output.\n"); + } + + updateAudioStatus(); + return scheduleResult; +} + // Render the live video texture through the runtime shader into the off-screen framebuffer. // Read the result back from the frame buffer and schedule it for playout. void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult completionResult) @@ -1149,6 +1285,16 @@ void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, // make GL context current in this thread wglMakeCurrent( hGLDC, hGLRC ); + if (mAudioEnabled) + { + AudioAnalysisSnapshot audioAnalysis; + { + std::lock_guard audioLock(mAudioStateMutex); + audioAnalysis = mAudioAnalysis; + } + updateAudioDataTexture(audioAnalysis); + } + // Draw the effect output to the off-screen framebuffer. const auto renderStartTime = std::chrono::steady_clock::now(); if (mFastTransferExtensionAvailable) @@ -1231,9 +1377,25 @@ void OpenGLComposite::PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, bool OpenGLComposite::Start() { mTotalPlayoutFrames = 0; + initializeAudioDelay(); + if (mAudioEnabled) + { + mDLOutput->FlushBufferedAudioSamples(); + if (mDLOutput->BeginAudioPreroll() != S_OK) + { + OutputDebugStringA("Could not begin DeckLink audio preroll; continuing without audio.\n"); + mDLInput->DisableAudioInput(); + mDLOutput->DisableAudioOutput(); + mAudioEnabled = false; + } + else + { + mAudioPrerolling = true; + } + } // Preroll frames - for (unsigned i = 0; i < 5; i++) + for (unsigned i = 0; i < kVideoPrerollFrameCount; i++) { // Take each video frame from the front of the queue and move it to the back IDeckLinkMutableVideoFrame* outputVideoFrame = mDLOutputVideoFrameQueue.front(); @@ -1264,8 +1426,26 @@ bool OpenGLComposite::Start() mTotalPlayoutFrames++; } - mDLInput->StartStreams(); - mDLOutput->StartScheduledPlayback(0, mFrameTimescale, 1.0); + if (mAudioEnabled) + RenderAudioSamples(TRUE); + + if (mAudioPrerolling) + { + if (mDLOutput->EndAudioPreroll() != S_OK) + { + OutputDebugStringA("Could not end DeckLink audio preroll; continuing without audio.\n"); + mDLInput->DisableAudioInput(); + mDLOutput->DisableAudioOutput(); + mAudioEnabled = false; + } + mAudioPrerolling = false; + } + + if (mDLInput->StartStreams() != S_OK) + return false; + + if (mDLOutput->StartScheduledPlayback(0, mFrameTimescale, 1.0) != S_OK) + return false; return true; } @@ -1297,9 +1477,16 @@ bool OpenGLComposite::Stop() mDLInput->StopStreams(); mDLInput->DisableVideoInput(); + if (mAudioEnabled) + mDLInput->DisableAudioInput(); mDLOutput->StopScheduledPlayback(0, NULL, 0); + mDLOutput->SetAudioCallback(NULL); + mDLOutput->SetScheduledFrameCompletionCallback(NULL); mDLOutput->DisableVideoOutput(); + mAudioPrerolling = false; + if (mAudioEnabled) + mDLOutput->DisableAudioOutput(); return true; } @@ -1411,6 +1598,9 @@ bool OpenGLComposite::compileSingleLayerProgram(const RuntimeRenderState& state, const GLint videoInputLocation = glGetUniformLocation(newProgram.get(), "gVideoInput"); if (videoInputLocation >= 0) glUniform1i(videoInputLocation, static_cast(kDecodedVideoTextureUnit)); + const GLint audioDataLocation = glGetUniformLocation(newProgram.get(), "gAudioData"); + if (audioDataLocation >= 0) + glUniform1i(audioDataLocation, static_cast(kAudioDataTextureUnit)); for (unsigned index = 0; index < historyCap; ++index) { const std::string sourceSamplerName = "gSourceHistory" + std::to_string(index); @@ -1973,6 +2163,8 @@ void OpenGLComposite::renderShaderProgram(GLuint sourceTexture, GLuint destinati glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glActiveTexture(GL_TEXTURE0 + kDecodedVideoTextureUnit); glBindTexture(GL_TEXTURE_2D, sourceTexture); + glActiveTexture(GL_TEXTURE0 + kAudioDataTextureUnit); + glBindTexture(GL_TEXTURE_2D, mAudioDataTexture); bindHistorySamplers(state, sourceTexture); bindLayerTextureAssets(layerProgram); glBindVertexArray(mFullscreenVAO); @@ -1995,6 +2187,8 @@ void OpenGLComposite::renderShaderProgram(GLuint sourceTexture, GLuint destinati glActiveTexture(GL_TEXTURE0 + shaderTextureBase + static_cast(index)); glBindTexture(GL_TEXTURE_2D, 0); } + glActiveTexture(GL_TEXTURE0 + kAudioDataTextureUnit); + glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0 + kDecodedVideoTextureUnit); glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); @@ -2066,6 +2260,55 @@ void OpenGLComposite::broadcastRuntimeState() mControlServer->BroadcastState(); } +unsigned OpenGLComposite::delayedAudioSampleFrames() const +{ + return static_cast(AudioSampleTimeForVideoFrame(kVideoPrerollFrameCount, mFrameDuration, mFrameTimescale)); +} + +void OpenGLComposite::initializeAudioDelay() +{ + std::lock_guard audioLock(mAudioStateMutex); + mAudioAnalyzer.Reset(); + mAudioAnalysis = AudioAnalysisSnapshot(); + mAudioDelayBuffer.Reset(delayedAudioSampleFrames()); + mNextAudioSampleFrame = 0; + updateAudioStatus(); +} + +AudioFrameBlock OpenGLComposite::popAudioForVideoFrame(uint64_t videoFrameIndex) +{ + const unsigned sampleFrames = AudioSamplesForVideoFrame(videoFrameIndex, mFrameDuration, mFrameTimescale); + bool underrun = false; + AudioFrameBlock block = mAudioDelayBuffer.Pop(sampleFrames, underrun); + mAudioAnalysis = mAudioAnalyzer.Analyze(block); + return block; +} + +void OpenGLComposite::updateAudioDataTexture(const AudioAnalysisSnapshot& analysis) +{ + if (mAudioDataTexture == 0) + return; + + glActiveTexture(GL_TEXTURE0 + kAudioDataTextureUnit); + glBindTexture(GL_TEXTURE_2D, mAudioDataTexture); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kAudioTextureWidth, kAudioTextureHeight, GL_RGBA, GL_FLOAT, analysis.texture.data()); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); +} + +void OpenGLComposite::updateAudioStatus() +{ + if (!mRuntimeHost) + return; + + AudioStatusSnapshot status; + status.enabled = mAudioEnabled; + status.bufferedSampleFrames = mAudioDelayBuffer.BufferedSampleFrames(); + status.underrunCount = mAudioDelayBuffer.UnderrunCount(); + status.analysis = mAudioAnalysis; + mRuntimeHost->SetAudioStatus(status); +} + bool OpenGLComposite::updateGlobalParamsBuffer(const RuntimeRenderState& state, unsigned availableSourceHistoryLength, unsigned availableTemporalHistoryLength) { std::vector buffer; @@ -2085,6 +2328,15 @@ bool OpenGLComposite::updateGlobalParamsBuffer(const RuntimeRenderState& state, : 0u; AppendStd140Int(buffer, static_cast(effectiveSourceHistoryLength)); AppendStd140Int(buffer, static_cast(effectiveTemporalHistoryLength)); + AppendStd140Vec2(buffer, state.audioAnalysis.rms[0], state.audioAnalysis.rms[1]); + AppendStd140Vec2(buffer, state.audioAnalysis.peak[0], state.audioAnalysis.peak[1]); + AppendStd140Float(buffer, state.audioAnalysis.monoRms); + AppendStd140Float(buffer, state.audioAnalysis.monoPeak); + AppendStd140Vec4(buffer, + state.audioAnalysis.bands[0], + state.audioAnalysis.bands[1], + state.audioAnalysis.bands[2], + state.audioAnalysis.bands[3]); for (const ShaderParameterDefinition& definition : state.parameterDefinitions) { @@ -2623,11 +2875,14 @@ ULONG CaptureDelegate::Release() return newCount; } -HRESULT CaptureDelegate::VideoInputFrameArrived(IDeckLinkVideoInputFrame* inputFrame, IDeckLinkAudioInputPacket* /*audioPacket*/) +HRESULT CaptureDelegate::VideoInputFrameArrived(IDeckLinkVideoInputFrame* inputFrame, IDeckLinkAudioInputPacket* audioPacket) { + if (audioPacket) + m_pOwner->AudioPacketArrived(audioPacket); + if (! inputFrame) { - // It's possible to receive a NULL inputFrame, but a valid audioPacket. Ignore audio-only frame. + // It's possible to receive a NULL inputFrame, but a valid audioPacket. return S_OK; } @@ -2653,6 +2908,23 @@ PlayoutDelegate::PlayoutDelegate(OpenGLComposite* pOwner) : HRESULT PlayoutDelegate::QueryInterface(REFIID iid, LPVOID *ppv) { + if (ppv == nullptr) + return E_POINTER; + + if (iid == IID_IUnknown || iid == IID_IDeckLinkVideoOutputCallback) + { + *ppv = static_cast(this); + AddRef(); + return S_OK; + } + + if (iid == IID_IDeckLinkAudioOutputCallback) + { + *ppv = static_cast(this); + AddRef(); + return S_OK; + } + *ppv = NULL; return E_NOINTERFACE; } @@ -2694,3 +2966,8 @@ HRESULT PlayoutDelegate::ScheduledPlaybackHasStopped () { return S_OK; } + +HRESULT PlayoutDelegate::RenderAudioSamples (BOOL preroll) +{ + return m_pOwner->RenderAudioSamples(preroll); +} diff --git a/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.h b/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.h index c5f74ed..335bee2 100644 --- a/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.h +++ b/apps/LoopThroughWithOpenGLCompositing/OpenGLComposite.h @@ -52,13 +52,16 @@ #include #include "DeckLinkAPI_h.h" +#include "AudioSupport.h" #include "VideoFrameTransfer.h" #include "RuntimeHost.h" #include +#include #include #include #include +#include #include #include @@ -96,6 +99,8 @@ public: void paintGL(); void VideoFrameArrived(IDeckLinkVideoInputFrame* inputFrame, bool hasNoInputSource); + void AudioPacketArrived(IDeckLinkAudioInputPacket* audioPacket); + HRESULT RenderAudioSamples(BOOL preroll); void PlayoutFrameCompleted(IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult result); private: @@ -112,12 +117,14 @@ private: // DeckLink IDeckLinkInput* mDLInput; IDeckLinkOutput* mDLOutput; + IDeckLinkConfiguration* mDLInputConfiguration; IDeckLinkKeyer* mDLKeyer; std::deque mDLOutputVideoFrameQueue; PinnedMemoryAllocator* mPlayoutAllocator; BMDTimeValue mFrameDuration; BMDTimeScale mFrameTimescale; unsigned mTotalPlayoutFrames; + uint64_t mNextAudioSampleFrame; unsigned mInputFrameWidth; unsigned mInputFrameHeight; unsigned mOutputFrameWidth; @@ -139,6 +146,7 @@ private: GLuint mLayerTempTexture; GLuint mFBOTexture; GLuint mOutputTexture; + GLuint mAudioDataTexture; GLuint mUnpinnedTextureBuffer; GLuint mDecodeFrameBuf; GLuint mLayerTempFrameBuf; @@ -157,6 +165,12 @@ private: std::unique_ptr mRuntimeHost; std::unique_ptr mControlServer; std::unique_ptr mOscServer; + bool mAudioEnabled; + bool mAudioPrerolling; + std::mutex mAudioStateMutex; + AudioDelayBuffer mAudioDelayBuffer; + AudioAnalyzer mAudioAnalyzer; + AudioAnalysisSnapshot mAudioAnalysis; struct LayerProgram { @@ -209,6 +223,11 @@ private: void renderEffect(); bool PollRuntimeChanges(); void broadcastRuntimeState(); + void initializeAudioDelay(); + unsigned delayedAudioSampleFrames() const; + AudioFrameBlock popAudioForVideoFrame(uint64_t videoFrameIndex); + void updateAudioDataTexture(const AudioAnalysisSnapshot& analysis); + void updateAudioStatus(); bool updateGlobalParamsBuffer(const RuntimeRenderState& state, unsigned availableSourceHistoryLength, unsigned availableTemporalHistoryLength); bool validateTemporalTextureUnitBudget(const std::vector& layerStates, std::string& error) const; bool ensureTemporalHistoryResources(const std::vector& layerStates, std::string& error); @@ -341,7 +360,7 @@ public: // Render Delegate Class //////////////////////////////////////////// -class PlayoutDelegate : public IDeckLinkVideoOutputCallback +class PlayoutDelegate : public IDeckLinkVideoOutputCallback, public IDeckLinkAudioOutputCallback { OpenGLComposite* m_pOwner; LONG mRefCount; @@ -356,6 +375,7 @@ public: virtual HRESULT STDMETHODCALLTYPE ScheduledFrameCompleted (IDeckLinkVideoFrame* completedFrame, BMDOutputFrameCompletionResult result); virtual HRESULT STDMETHODCALLTYPE ScheduledPlaybackHasStopped (); + virtual HRESULT STDMETHODCALLTYPE RenderAudioSamples (BOOL preroll); }; #endif // __OPENGL_COMPOSITE_H__ diff --git a/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.cpp b/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.cpp index 8e49e77..d8db93b 100644 --- a/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.cpp +++ b/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.cpp @@ -1055,6 +1055,12 @@ void RuntimeHost::SetPerformanceStats(double frameBudgetMilliseconds, double ren mSmoothedRenderMilliseconds = mSmoothedRenderMilliseconds * 0.9 + mRenderMilliseconds * 0.1; } +void RuntimeHost::SetAudioStatus(const AudioStatusSnapshot& status) +{ + std::lock_guard lock(mMutex); + mAudioStatus = status; +} + void RuntimeHost::AdvanceFrame() { std::lock_guard lock(mMutex); @@ -1121,6 +1127,7 @@ std::vector RuntimeHost::GetLayerRenderStates(unsigned outpu state.inputHeight = mSignalHeight; state.outputWidth = outputWidth; state.outputHeight = outputHeight; + state.audioAnalysis = mAudioStatus.analysis; state.parameterDefinitions = shaderIt->second.parameters; state.textureAssets = shaderIt->second.textureAssets; state.isTemporal = shaderIt->second.temporal.enabled; @@ -1182,6 +1189,21 @@ bool RuntimeHost::LoadConfig(std::string& error) } if (const JsonValue* enableExternalKeyingValue = configJson.find("enableExternalKeying")) mConfig.enableExternalKeying = enableExternalKeyingValue->asBoolean(mConfig.enableExternalKeying); + if (const JsonValue* audioEnabledValue = configJson.find("audioEnabled")) + mConfig.audioEnabled = audioEnabledValue->asBoolean(mConfig.audioEnabled); + if (const JsonValue* audioChannelCountValue = configJson.find("audioChannelCount")) + mConfig.audioChannelCount = static_cast(audioChannelCountValue->asNumber(static_cast(mConfig.audioChannelCount))); + if (const JsonValue* audioSampleRateValue = configJson.find("audioSampleRate")) + mConfig.audioSampleRate = static_cast(audioSampleRateValue->asNumber(static_cast(mConfig.audioSampleRate))); + if (const JsonValue* audioDelayModeValue = configJson.find("audioDelayMode")) + { + if (audioDelayModeValue->isString() && !audioDelayModeValue->asString().empty()) + mConfig.audioDelayMode = audioDelayModeValue->asString(); + } + if (mConfig.audioChannelCount != kAudioChannelCount) + mConfig.audioChannelCount = kAudioChannelCount; + if (mConfig.audioSampleRate != kAudioSampleRate) + mConfig.audioSampleRate = kAudioSampleRate; if (const JsonValue* videoFormatValue = configJson.find("videoFormat")) { if (videoFormatValue->isString() && !videoFormatValue->asString().empty()) @@ -1519,6 +1541,10 @@ JsonValue RuntimeHost::BuildStateValue() const app.set("autoReload", JsonValue(mAutoReloadEnabled)); app.set("maxTemporalHistoryFrames", JsonValue(static_cast(mConfig.maxTemporalHistoryFrames))); app.set("enableExternalKeying", JsonValue(mConfig.enableExternalKeying)); + app.set("audioEnabled", JsonValue(mConfig.audioEnabled)); + app.set("audioChannelCount", JsonValue(static_cast(mConfig.audioChannelCount))); + app.set("audioSampleRate", JsonValue(static_cast(mConfig.audioSampleRate))); + app.set("audioDelayMode", JsonValue(mConfig.audioDelayMode)); app.set("inputVideoFormat", JsonValue(mConfig.inputVideoFormat)); app.set("inputFrameRate", JsonValue(mConfig.inputFrameRate)); app.set("outputVideoFormat", JsonValue(mConfig.outputVideoFormat)); @@ -1538,6 +1564,26 @@ JsonValue RuntimeHost::BuildStateValue() const video.set("modeName", JsonValue(mSignalModeName)); root.set("video", video); + JsonValue audio = JsonValue::MakeObject(); + audio.set("enabled", JsonValue(mAudioStatus.enabled)); + audio.set("bufferedSampleFrames", JsonValue(static_cast(mAudioStatus.bufferedSampleFrames))); + audio.set("underrunCount", JsonValue(static_cast(mAudioStatus.underrunCount))); + JsonValue rms = JsonValue::MakeArray(); + rms.pushBack(JsonValue(static_cast(mAudioStatus.analysis.rms[0]))); + rms.pushBack(JsonValue(static_cast(mAudioStatus.analysis.rms[1]))); + audio.set("rms", rms); + JsonValue peak = JsonValue::MakeArray(); + peak.pushBack(JsonValue(static_cast(mAudioStatus.analysis.peak[0]))); + peak.pushBack(JsonValue(static_cast(mAudioStatus.analysis.peak[1]))); + audio.set("peak", peak); + audio.set("monoRms", JsonValue(static_cast(mAudioStatus.analysis.monoRms))); + audio.set("monoPeak", JsonValue(static_cast(mAudioStatus.analysis.monoPeak))); + JsonValue bands = JsonValue::MakeArray(); + for (float band : mAudioStatus.analysis.bands) + bands.pushBack(JsonValue(static_cast(band))); + audio.set("bands", bands); + root.set("audio", audio); + JsonValue deckLink = JsonValue::MakeObject(); deckLink.set("modelName", JsonValue(mDeckLinkOutputStatus.modelName)); deckLink.set("supportsInternalKeying", JsonValue(mDeckLinkOutputStatus.supportsInternalKeying)); diff --git a/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.h b/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.h index 016533b..8b53c39 100644 --- a/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.h +++ b/apps/LoopThroughWithOpenGLCompositing/RuntimeHost.h @@ -38,6 +38,7 @@ public: void SetDeckLinkOutputStatus(const std::string& modelName, bool supportsInternalKeying, bool supportsExternalKeying, bool keyerInterfaceAvailable, bool externalKeyingRequested, bool externalKeyingActive, const std::string& statusMessage); void SetPerformanceStats(double frameBudgetMilliseconds, double renderMilliseconds); + void SetAudioStatus(const AudioStatusSnapshot& status); void AdvanceFrame(); bool BuildLayerFragmentShaderSource(const std::string& layerId, std::string& fragmentShaderSource, std::string& error); @@ -52,6 +53,9 @@ public: unsigned short GetOscPort() const { return mConfig.oscPort; } unsigned GetMaxTemporalHistoryFrames() const { return mConfig.maxTemporalHistoryFrames; } bool ExternalKeyingEnabled() const { return mConfig.enableExternalKeying; } + bool AudioEnabled() const { return mConfig.audioEnabled; } + unsigned AudioChannelCount() const { return mConfig.audioChannelCount; } + unsigned AudioSampleRate() const { return mConfig.audioSampleRate; } const std::string& GetInputVideoFormat() const { return mConfig.inputVideoFormat; } const std::string& GetInputFrameRate() const { return mConfig.inputFrameRate; } const std::string& GetOutputVideoFormat() const { return mConfig.outputVideoFormat; } @@ -68,6 +72,10 @@ private: bool autoReload = true; unsigned maxTemporalHistoryFrames = 4; bool enableExternalKeying = false; + bool audioEnabled = true; + unsigned audioChannelCount = kAudioChannelCount; + unsigned audioSampleRate = kAudioSampleRate; + std::string audioDelayMode = "matchVideoPreroll"; std::string inputVideoFormat = "1080p"; std::string inputFrameRate = "59.94"; std::string outputVideoFormat = "1080p"; @@ -148,6 +156,7 @@ private: double mRenderMilliseconds; double mSmoothedRenderMilliseconds; DeckLinkOutputStatus mDeckLinkOutputStatus; + AudioStatusSnapshot mAudioStatus; unsigned short mServerPort; bool mAutoReloadEnabled; std::chrono::steady_clock::time_point mStartTime; diff --git a/apps/LoopThroughWithOpenGLCompositing/ShaderTypes.h b/apps/LoopThroughWithOpenGLCompositing/ShaderTypes.h index 19fa473..96f3e90 100644 --- a/apps/LoopThroughWithOpenGLCompositing/ShaderTypes.h +++ b/apps/LoopThroughWithOpenGLCompositing/ShaderTypes.h @@ -5,6 +5,8 @@ #include #include +#include "AudioSupport.h" + enum class ShaderParameterType { Float, @@ -95,6 +97,7 @@ struct RuntimeRenderState unsigned inputHeight = 0; unsigned outputWidth = 0; unsigned outputHeight = 0; + AudioAnalysisSnapshot audioAnalysis; bool isTemporal = false; TemporalHistorySource temporalHistorySource = TemporalHistorySource::None; unsigned requestedTemporalHistoryLength = 0; diff --git a/config/runtime-host.json b/config/runtime-host.json index 5e3a55e..f5bf579 100644 --- a/config/runtime-host.json +++ b/config/runtime-host.json @@ -8,5 +8,9 @@ "outputFrameRate": "59.94", "autoReload": true, "maxTemporalHistoryFrames": 12, + "audioEnabled": true, + "audioChannelCount": 2, + "audioSampleRate": 48000, + "audioDelayMode": "matchVideoPreroll", "enableExternalKeying": true } diff --git a/runtime/templates/shader_wrapper.slang.in b/runtime/templates/shader_wrapper.slang.in index c6a6557..58c5ef4 100644 --- a/runtime/templates/shader_wrapper.slang.in +++ b/runtime/templates/shader_wrapper.slang.in @@ -16,6 +16,11 @@ struct ShaderContext float bypass; int sourceHistoryLength; int temporalHistoryLength; + float2 audioRms; + float2 audioPeak; + float audioMonoRms; + float audioMonoPeak; + float4 audioBands; }; cbuffer GlobalParams @@ -28,15 +33,31 @@ cbuffer GlobalParams float gBypass; int gSourceHistoryLength; int gTemporalHistoryLength; + float2 gAudioRms; + float2 gAudioPeak; + float gAudioMonoRms; + float gAudioMonoPeak; + float4 gAudioBands; {{PARAMETER_UNIFORMS}}}; Sampler2D gVideoInput; +Sampler2D gAudioData; {{SOURCE_HISTORY_SAMPLERS}}{{TEMPORAL_HISTORY_SAMPLERS}}{{TEXTURE_SAMPLERS}} float4 sampleVideo(float2 tc) { return gVideoInput.Sample(tc); } +float4 sampleAudioWaveform(float x) +{ + return gAudioData.Sample(float2(saturate(x), 0.25)); +} + +float4 sampleAudioSpectrum(float x) +{ + return gAudioData.Sample(float2(saturate(x), 0.75)); +} + float4 sampleSourceHistory(int framesAgo, float2 tc) { if (gSourceHistoryLength <= 0) @@ -83,6 +104,11 @@ float4 fragmentMain(FragmentInput input) : SV_Target context.bypass = gBypass; context.sourceHistoryLength = gSourceHistoryLength; context.temporalHistoryLength = gTemporalHistoryLength; + context.audioRms = gAudioRms; + context.audioPeak = gAudioPeak; + context.audioMonoRms = gAudioMonoRms; + context.audioMonoPeak = gAudioMonoPeak; + context.audioBands = gAudioBands; float4 effectedColor = {{ENTRY_POINT_CALL}}; float mixValue = clamp(gBypass > 0.5 ? 0.0 : gMixAmount, 0.0, 1.0); return lerp(context.sourceColor, effectedColor, mixValue); diff --git a/shaders/audio-vu-meter/shader.json b/shaders/audio-vu-meter/shader.json new file mode 100644 index 0000000..3442625 --- /dev/null +++ b/shaders/audio-vu-meter/shader.json @@ -0,0 +1,76 @@ +{ + "id": "audio-vu-meter", + "name": "Audio VU Meter", + "description": "Draws stereo audio level meters from the runtime audio analysis data.", + "category": "Utility", + "entryPoint": "shadeVideo", + "parameters": [ + { + "id": "meterPosition", + "label": "Position", + "type": "vec2", + "default": [0.08, 0.82], + "min": [0.0, 0.0], + "max": [1.0, 1.0], + "step": [0.01, 0.01] + }, + { + "id": "meterScale", + "label": "Scale", + "type": "float", + "default": 0.35, + "min": 0.1, + "max": 1.0, + "step": 0.01 + }, + { + "id": "meterOpacity", + "label": "Opacity", + "type": "float", + "default": 0.9, + "min": 0.0, + "max": 1.0, + "step": 0.01 + }, + { + "id": "noiseGate", + "label": "Noise Gate", + "type": "float", + "default": 0.03, + "min": 0.0, + "max": 0.5, + "step": 0.01 + }, + { + "id": "meterColor", + "label": "Meter Color", + "type": "color", + "default": [0.2, 1.0, 0.55, 1.0] + }, + { + "id": "peakColor", + "label": "Peak Color", + "type": "color", + "default": [1.0, 0.85, 0.2, 1.0] + }, + { + "id": "backgroundOpacity", + "label": "Background", + "type": "float", + "default": 0.45, + "min": 0.0, + "max": 1.0, + "step": 0.01 + }, + { + "id": "orientation", + "label": "Orientation", + "type": "enum", + "default": "horizontal", + "options": [ + { "value": "horizontal", "label": "Horizontal" }, + { "value": "vertical", "label": "Vertical" } + ] + } + ] +} diff --git a/shaders/audio-vu-meter/shader.slang b/shaders/audio-vu-meter/shader.slang new file mode 100644 index 0000000..fd2285a --- /dev/null +++ b/shaders/audio-vu-meter/shader.slang @@ -0,0 +1,59 @@ +float rectMask(float2 uv, float2 minUv, float2 maxUv) +{ + float2 insideMin = step(minUv, uv); + float2 insideMax = step(uv, maxUv); + return insideMin.x * insideMin.y * insideMax.x * insideMax.y; +} + +float denoiseLevel(float value) +{ + float gate = saturate(noiseGate); + float clean = saturate((value - gate) / max(1.0 - gate, 0.001)); + return smoothstep(0.0, 1.0, clean); +} + +float4 shadeVideo(ShaderContext context) +{ + float4 color = context.sourceColor; + float2 size = orientation == 0 ? float2(meterScale, meterScale * 0.18) : float2(meterScale * 0.18, meterScale); + float2 minUv = clamp(meterPosition, 0.0, 1.0 - size); + float2 local = (context.uv - minUv) / max(size, float2(0.001)); + float inside = rectMask(local, float2(0.0), float2(1.0)); + if (inside <= 0.0) + return color; + + float3 bg = lerp(color.rgb, float3(0.0), saturate(backgroundOpacity)); + float leftLevel = denoiseLevel(context.audioRms.x * 2.4); + float rightLevel = denoiseLevel(context.audioRms.y * 2.4); + float leftPeak = denoiseLevel(context.audioPeak.x); + float rightPeak = denoiseLevel(context.audioPeak.y); + + float bar = 0.0; + float peak = 0.0; + if (orientation == 0) + { + float leftRow = rectMask(local, float2(0.04, 0.58), float2(0.96, 0.86)); + float rightRow = rectMask(local, float2(0.04, 0.14), float2(0.96, 0.42)); + float leftFill = rectMask(local, float2(0.04, 0.58), float2(0.04 + 0.92 * leftLevel, 0.86)); + float rightFill = rectMask(local, float2(0.04, 0.14), float2(0.04 + 0.92 * rightLevel, 0.42)); + float leftPeakLine = rectMask(local, float2(0.04 + 0.92 * leftPeak - 0.006, 0.55), float2(0.04 + 0.92 * leftPeak + 0.006, 0.89)); + float rightPeakLine = rectMask(local, float2(0.04 + 0.92 * rightPeak - 0.006, 0.11), float2(0.04 + 0.92 * rightPeak + 0.006, 0.45)); + bar = max(leftFill, rightFill); + peak = max(leftPeakLine * leftRow, rightPeakLine * rightRow); + } + else + { + float leftColumn = rectMask(local, float2(0.14, 0.04), float2(0.42, 0.96)); + float rightColumn = rectMask(local, float2(0.58, 0.04), float2(0.86, 0.96)); + float leftFill = rectMask(local, float2(0.14, 0.04), float2(0.42, 0.04 + 0.92 * leftLevel)); + float rightFill = rectMask(local, float2(0.58, 0.04), float2(0.86, 0.04 + 0.92 * rightLevel)); + float leftPeakLine = rectMask(local, float2(0.11, 0.04 + 0.92 * leftPeak - 0.006), float2(0.45, 0.04 + 0.92 * leftPeak + 0.006)); + float rightPeakLine = rectMask(local, float2(0.55, 0.04 + 0.92 * rightPeak - 0.006), float2(0.89, 0.04 + 0.92 * rightPeak + 0.006)); + bar = max(leftFill * leftColumn, rightFill * rightColumn); + peak = max(leftPeakLine, rightPeakLine); + } + + float3 metered = lerp(bg, meterColor.rgb, bar * saturate(meterOpacity) * meterColor.a); + metered = lerp(metered, peakColor.rgb, peak * saturate(meterOpacity) * peakColor.a); + return float4(metered, color.a); +} diff --git a/tests/AudioSupportTests.cpp b/tests/AudioSupportTests.cpp new file mode 100644 index 0000000..f4f25b0 --- /dev/null +++ b/tests/AudioSupportTests.cpp @@ -0,0 +1,115 @@ +#include "AudioSupport.h" + +#include +#include +#include +#include +#include + +namespace +{ +int gFailures = 0; + +void Expect(bool condition, const char* message) +{ + if (condition) + return; + + std::cerr << "FAIL: " << message << "\n"; + ++gFailures; +} + +int32_t ToSample(float value) +{ + const double clamped = std::max(-1.0, std::min(1.0, static_cast(value))); + return static_cast(clamped * 2147483647.0); +} + +void TestFrameSampleCounts() +{ + Expect(AudioSamplesForVideoFrame(0, 1, 50) == 960, "50 fps first frame has 960 audio samples"); + Expect(AudioSamplesForVideoFrame(0, 1, 60) == 800, "60 fps first frame has 800 audio samples"); + + uint64_t total = 0; + for (uint64_t frame = 0; frame < 600; ++frame) + total += AudioSamplesForVideoFrame(frame, 1001, 60000); + Expect(total == AudioSampleTimeForVideoFrame(600, 1001, 60000), "59.94 fps sample counts do not drift"); +} + +void TestDelayBuffer() +{ + AudioDelayBuffer buffer; + buffer.Reset(4); + std::vector input = { + 11, 12, + 21, 22, + 31, 32, + 41, 42 + }; + buffer.PushInterleaved(input.data(), 4); + + bool underrun = false; + AudioFrameBlock first = buffer.Pop(4, underrun); + Expect(!underrun, "delay-buffer initial silence does not underrun"); + Expect(first.frameCount() == 4, "delay-buffer returns requested frame count"); + Expect(first.interleavedSamples[0] == 0 && first.interleavedSamples[7] == 0, "delay-buffer emits initial silence"); + + AudioFrameBlock second = buffer.Pop(4, underrun); + Expect(!underrun, "delay-buffer emits delayed input without underrun"); + Expect(second.interleavedSamples == input, "delay-buffer preserves delayed interleaved samples"); + + AudioFrameBlock third = buffer.Pop(2, underrun); + Expect(underrun, "delay-buffer reports underrun"); + Expect(third.interleavedSamples[0] == 0 && third.interleavedSamples[3] == 0, "delay-buffer underrun fills silence"); +} + +void TestAnalyzerSilence() +{ + AudioAnalyzer analyzer; + AudioFrameBlock block; + block.interleavedSamples.resize(512 * kAudioChannelCount, 0); + + AudioAnalysisSnapshot analysis = analyzer.Analyze(block); + Expect(analysis.rms[0] == 0.0f && analysis.rms[1] == 0.0f, "silence rms is zero"); + Expect(analysis.peak[0] == 0.0f && analysis.peak[1] == 0.0f, "silence peak is zero"); + Expect(analysis.bands[0] == 0.0f && analysis.bands[3] == 0.0f, "silence bands are zero"); +} + +void TestAnalyzerSineAndStereo() +{ + AudioAnalyzer analyzer; + AudioFrameBlock block; + block.interleavedSamples.resize(1024 * kAudioChannelCount, 0); + + for (std::size_t frame = 0; frame < 1024; ++frame) + { + const float phase = static_cast(frame) * 2.0f * 3.14159265f * 300.0f / static_cast(kAudioSampleRate); + block.interleavedSamples[frame * 2] = ToSample(std::sin(phase) * 0.8f); + block.interleavedSamples[frame * 2 + 1] = ToSample(0.25f); + } + + AudioAnalysisSnapshot analysis = analyzer.Analyze(block); + Expect(analysis.peak[0] > 0.75f && analysis.peak[0] <= 0.81f, "left sine peak is detected"); + Expect(analysis.rms[0] > 0.45f && analysis.rms[0] < 0.65f, "left sine rms is detected"); + Expect(analysis.peak[1] > 0.24f && analysis.peak[1] < 0.26f, "right constant peak remains independent"); + Expect(analysis.rms[1] > 0.24f && analysis.rms[1] < 0.26f, "right constant rms remains independent"); + Expect(analysis.bands[1] >= analysis.bands[0], "300 Hz sine activates lower-mid band"); +} +} + +int main() +{ + TestFrameSampleCounts(); + TestDelayBuffer(); + TestAnalyzerSilence(); + TestAnalyzerSineAndStereo(); + + if (gFailures != 0) + { + std::cerr << gFailures << " AudioSupport test failure(s).\n"; + return 1; + } + + std::cout << "AudioSupport tests passed.\n"; + return 0; +} diff --git a/ui/src/App.jsx b/ui/src/App.jsx index 1940394..f6a3776 100644 --- a/ui/src/App.jsx +++ b/ui/src/App.jsx @@ -19,6 +19,7 @@ function App() { const performance = appState?.performance ?? {}; const runtime = appState?.runtime ?? {}; const video = appState?.video ?? {}; + const audio = appState?.audio ?? {}; const app = appState?.app ?? {}; const stackPresets = appState?.stackPresets ?? []; @@ -67,7 +68,7 @@ function App() {
- +
@@ -36,6 +36,21 @@ export function StatusPanels({ app, performance, runtime, video }) { />
+
+

Audio

+ +
+

Compiler

{runtime.compileMessage || "No compiler output."}