#include "AudioSupport.h" #include #include #include #include namespace { constexpr float kInt32ToFloat = 1.0f / 2147483648.0f; constexpr std::size_t kAnalysisWindowSamples = 1024; constexpr std::size_t kMaxBufferedAudioFrames = kAudioSampleRate * 10; float Clamp01(float value) { return std::max(0.0f, std::min(1.0f, value)); } float SampleToFloat(int32_t sample) { return std::max(-1.0f, std::min(1.0f, static_cast(sample) * kInt32ToFloat)); } float GoertzelMagnitude(const std::vector& samples, float frequency) { if (samples.empty()) return 0.0f; const double omega = 2.0 * 3.14159265358979323846 * static_cast(frequency) / static_cast(kAudioSampleRate); const double coefficient = 2.0 * std::cos(omega); double q0 = 0.0; double q1 = 0.0; double q2 = 0.0; for (float sample : samples) { q0 = coefficient * q1 - q2 + static_cast(sample); q2 = q1; q1 = q0; } const double power = q1 * q1 + q2 * q2 - coefficient * q1 * q2; return static_cast(std::sqrt(std::max(0.0, power)) / static_cast(samples.size())); } } uint64_t AudioSampleTimeForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate) { if (frameTimescale == 0) return 0; const uint64_t numerator = videoFrameIndex * frameDuration * audioSampleRate; return (numerator + frameTimescale / 2) / frameTimescale; } unsigned AudioSamplesForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate) { const uint64_t start = AudioSampleTimeForVideoFrame(videoFrameIndex, frameDuration, frameTimescale, audioSampleRate); const uint64_t end = AudioSampleTimeForVideoFrame(videoFrameIndex + 1, frameDuration, frameTimescale, audioSampleRate); return static_cast(end > start ? end - start : 0); } void AudioDelayBuffer::Reset(unsigned delaySampleFrames) { std::lock_guard lock(mMutex); mSamples.clear(); mSamples.resize(static_cast(delaySampleFrames) * kAudioChannelCount, 0); mUnderrunCount = 0; } void AudioDelayBuffer::PushInterleaved(const int32_t* samples, std::size_t sampleFrameCount) { if (!samples || sampleFrameCount == 0) return; std::lock_guard lock(mMutex); const std::size_t sampleCount = sampleFrameCount * kAudioChannelCount; for (std::size_t index = 0; index < sampleCount; ++index) mSamples.push_back(samples[index]); const std::size_t maxSamples = kMaxBufferedAudioFrames * kAudioChannelCount; while (mSamples.size() > maxSamples) mSamples.pop_front(); } AudioFrameBlock AudioDelayBuffer::Pop(std::size_t sampleFrameCount, bool& underrun) { AudioFrameBlock block; block.interleavedSamples.resize(sampleFrameCount * kAudioChannelCount, 0); std::lock_guard lock(mMutex); const std::size_t requestedSamples = sampleFrameCount * kAudioChannelCount; underrun = mSamples.size() < requestedSamples; if (underrun) ++mUnderrunCount; const std::size_t availableSamples = std::min(requestedSamples, mSamples.size()); for (std::size_t index = 0; index < availableSamples; ++index) { block.interleavedSamples[index] = mSamples.front(); mSamples.pop_front(); } return block; } unsigned AudioDelayBuffer::BufferedSampleFrames() const { std::lock_guard lock(mMutex); return static_cast(mSamples.size() / kAudioChannelCount); } uint64_t AudioDelayBuffer::UnderrunCount() const { std::lock_guard lock(mMutex); return mUnderrunCount; } void AudioAnalyzer::Reset() { mMonoHistory.clear(); mSmoothedBands = { 0.0f, 0.0f, 0.0f, 0.0f }; mCurrent = AudioAnalysisSnapshot(); } AudioAnalysisSnapshot AudioAnalyzer::Analyze(const AudioFrameBlock& block) { AudioAnalysisSnapshot next; double sumSquares[2] = { 0.0, 0.0 }; float peak[2] = { 0.0f, 0.0f }; double monoSumSquares = 0.0; float monoPeak = 0.0f; const std::size_t frames = block.frameCount(); for (std::size_t frame = 0; frame < frames; ++frame) { const float left = SampleToFloat(block.interleavedSamples[frame * 2]); const float right = SampleToFloat(block.interleavedSamples[frame * 2 + 1]); const float mono = (left + right) * 0.5f; sumSquares[0] += static_cast(left) * left; sumSquares[1] += static_cast(right) * right; peak[0] = std::max(peak[0], std::abs(left)); peak[1] = std::max(peak[1], std::abs(right)); monoSumSquares += static_cast(mono) * mono; monoPeak = std::max(monoPeak, std::abs(mono)); mMonoHistory.push_back(mono); while (mMonoHistory.size() > kAnalysisWindowSamples) mMonoHistory.pop_front(); } if (frames > 0) { next.rms[0] = static_cast(std::sqrt(sumSquares[0] / static_cast(frames))); next.rms[1] = static_cast(std::sqrt(sumSquares[1] / static_cast(frames))); next.peak[0] = peak[0]; next.peak[1] = peak[1]; next.monoRms = static_cast(std::sqrt(monoSumSquares / static_cast(frames))); next.monoPeak = monoPeak; } std::vector window(mMonoHistory.begin(), mMonoHistory.end()); const float bandFrequencies[4] = { 90.0f, 300.0f, 1200.0f, 5000.0f }; for (std::size_t band = 0; band < next.bands.size(); ++band) { const float raw = Clamp01(GoertzelMagnitude(window, bandFrequencies[band]) * 8.0f); const float smoothing = raw > mSmoothedBands[band] ? 0.45f : 0.12f; mSmoothedBands[band] = mSmoothedBands[band] + (raw - mSmoothedBands[band]) * smoothing; next.bands[band] = Clamp01(mSmoothedBands[band]); } for (unsigned x = 0; x < kAudioTextureWidth; ++x) { float mono = 0.0f; if (!mMonoHistory.empty()) { const std::size_t historyIndex = static_cast( (static_cast(x) * static_cast(mMonoHistory.size())) / kAudioTextureWidth); auto it = mMonoHistory.begin(); std::advance(it, std::min(historyIndex, mMonoHistory.size() - 1)); mono = *it; } const std::size_t waveformOffset = x * 4; next.texture[waveformOffset + 0] = mono * 0.5f + 0.5f; next.texture[waveformOffset + 1] = next.texture[waveformOffset + 0]; next.texture[waveformOffset + 2] = next.monoRms; next.texture[waveformOffset + 3] = 1.0f; const float bandPosition = static_cast(x) / static_cast(kAudioTextureWidth - 1); const float scaled = bandPosition * static_cast(next.bands.size() - 1); const unsigned bandA = static_cast(std::floor(scaled)); const unsigned bandB = std::min(bandA + 1, static_cast(next.bands.size() - 1)); const float t = scaled - static_cast(bandA); const float spectrum = next.bands[bandA] * (1.0f - t) + next.bands[bandB] * t; const std::size_t spectrumOffset = (kAudioTextureWidth + x) * 4; next.texture[spectrumOffset + 0] = spectrum; next.texture[spectrumOffset + 1] = next.bands[0]; next.texture[spectrumOffset + 2] = next.bands[1]; next.texture[spectrumOffset + 3] = next.bands[2]; } mCurrent = next; return mCurrent; }