Files
2026-05-04 14:32:29 +10:00

207 lines
6.8 KiB
C++

#include "AudioSupport.h"
#include <algorithm>
#include <cmath>
#include <iterator>
#include <limits>
namespace
{
constexpr float kInt32ToFloat = 1.0f / 2147483648.0f;
constexpr std::size_t kAnalysisWindowSamples = 1024;
constexpr std::size_t kMaxBufferedAudioFrames = kAudioSampleRate * 10;
float Clamp01(float value)
{
return std::max(0.0f, std::min(1.0f, value));
}
float SampleToFloat(int32_t sample)
{
return std::max(-1.0f, std::min(1.0f, static_cast<float>(sample) * kInt32ToFloat));
}
float GoertzelMagnitude(const std::vector<float>& samples, float frequency)
{
if (samples.empty())
return 0.0f;
const double omega = 2.0 * 3.14159265358979323846 * static_cast<double>(frequency) / static_cast<double>(kAudioSampleRate);
const double coefficient = 2.0 * std::cos(omega);
double q0 = 0.0;
double q1 = 0.0;
double q2 = 0.0;
for (float sample : samples)
{
q0 = coefficient * q1 - q2 + static_cast<double>(sample);
q2 = q1;
q1 = q0;
}
const double power = q1 * q1 + q2 * q2 - coefficient * q1 * q2;
return static_cast<float>(std::sqrt(std::max(0.0, power)) / static_cast<double>(samples.size()));
}
}
uint64_t AudioSampleTimeForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate)
{
if (frameTimescale == 0)
return 0;
const uint64_t numerator = videoFrameIndex * frameDuration * audioSampleRate;
return (numerator + frameTimescale / 2) / frameTimescale;
}
unsigned AudioSamplesForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate)
{
const uint64_t start = AudioSampleTimeForVideoFrame(videoFrameIndex, frameDuration, frameTimescale, audioSampleRate);
const uint64_t end = AudioSampleTimeForVideoFrame(videoFrameIndex + 1, frameDuration, frameTimescale, audioSampleRate);
return static_cast<unsigned>(end > start ? end - start : 0);
}
void AudioDelayBuffer::Reset(unsigned delaySampleFrames)
{
std::lock_guard<std::mutex> lock(mMutex);
mSamples.clear();
mSamples.resize(static_cast<std::size_t>(delaySampleFrames) * kAudioChannelCount, 0);
mUnderrunCount = 0;
}
void AudioDelayBuffer::PushInterleaved(const int32_t* samples, std::size_t sampleFrameCount)
{
if (!samples || sampleFrameCount == 0)
return;
std::lock_guard<std::mutex> lock(mMutex);
const std::size_t sampleCount = sampleFrameCount * kAudioChannelCount;
for (std::size_t index = 0; index < sampleCount; ++index)
mSamples.push_back(samples[index]);
const std::size_t maxSamples = kMaxBufferedAudioFrames * kAudioChannelCount;
while (mSamples.size() > maxSamples)
mSamples.pop_front();
}
AudioFrameBlock AudioDelayBuffer::Pop(std::size_t sampleFrameCount, bool& underrun)
{
AudioFrameBlock block;
block.interleavedSamples.resize(sampleFrameCount * kAudioChannelCount, 0);
std::lock_guard<std::mutex> lock(mMutex);
const std::size_t requestedSamples = sampleFrameCount * kAudioChannelCount;
underrun = mSamples.size() < requestedSamples;
if (underrun)
++mUnderrunCount;
const std::size_t availableSamples = std::min(requestedSamples, mSamples.size());
for (std::size_t index = 0; index < availableSamples; ++index)
{
block.interleavedSamples[index] = mSamples.front();
mSamples.pop_front();
}
return block;
}
unsigned AudioDelayBuffer::BufferedSampleFrames() const
{
std::lock_guard<std::mutex> lock(mMutex);
return static_cast<unsigned>(mSamples.size() / kAudioChannelCount);
}
uint64_t AudioDelayBuffer::UnderrunCount() const
{
std::lock_guard<std::mutex> lock(mMutex);
return mUnderrunCount;
}
void AudioAnalyzer::Reset()
{
mMonoHistory.clear();
mSmoothedBands = { 0.0f, 0.0f, 0.0f, 0.0f };
mCurrent = AudioAnalysisSnapshot();
}
AudioAnalysisSnapshot AudioAnalyzer::Analyze(const AudioFrameBlock& block)
{
AudioAnalysisSnapshot next;
double sumSquares[2] = { 0.0, 0.0 };
float peak[2] = { 0.0f, 0.0f };
double monoSumSquares = 0.0;
float monoPeak = 0.0f;
const std::size_t frames = block.frameCount();
for (std::size_t frame = 0; frame < frames; ++frame)
{
const float left = SampleToFloat(block.interleavedSamples[frame * 2]);
const float right = SampleToFloat(block.interleavedSamples[frame * 2 + 1]);
const float mono = (left + right) * 0.5f;
sumSquares[0] += static_cast<double>(left) * left;
sumSquares[1] += static_cast<double>(right) * right;
peak[0] = std::max(peak[0], std::abs(left));
peak[1] = std::max(peak[1], std::abs(right));
monoSumSquares += static_cast<double>(mono) * mono;
monoPeak = std::max(monoPeak, std::abs(mono));
mMonoHistory.push_back(mono);
while (mMonoHistory.size() > kAnalysisWindowSamples)
mMonoHistory.pop_front();
}
if (frames > 0)
{
next.rms[0] = static_cast<float>(std::sqrt(sumSquares[0] / static_cast<double>(frames)));
next.rms[1] = static_cast<float>(std::sqrt(sumSquares[1] / static_cast<double>(frames)));
next.peak[0] = peak[0];
next.peak[1] = peak[1];
next.monoRms = static_cast<float>(std::sqrt(monoSumSquares / static_cast<double>(frames)));
next.monoPeak = monoPeak;
}
std::vector<float> window(mMonoHistory.begin(), mMonoHistory.end());
const float bandFrequencies[4] = { 90.0f, 300.0f, 1200.0f, 5000.0f };
for (std::size_t band = 0; band < next.bands.size(); ++band)
{
const float raw = Clamp01(GoertzelMagnitude(window, bandFrequencies[band]) * 8.0f);
const float smoothing = raw > mSmoothedBands[band] ? 0.45f : 0.12f;
mSmoothedBands[band] = mSmoothedBands[band] + (raw - mSmoothedBands[band]) * smoothing;
next.bands[band] = Clamp01(mSmoothedBands[band]);
}
for (unsigned x = 0; x < kAudioTextureWidth; ++x)
{
float mono = 0.0f;
if (!mMonoHistory.empty())
{
const std::size_t historyIndex = static_cast<std::size_t>(
(static_cast<uint64_t>(x) * static_cast<uint64_t>(mMonoHistory.size())) / kAudioTextureWidth);
auto it = mMonoHistory.begin();
std::advance(it, std::min(historyIndex, mMonoHistory.size() - 1));
mono = *it;
}
const std::size_t waveformOffset = x * 4;
next.texture[waveformOffset + 0] = mono * 0.5f + 0.5f;
next.texture[waveformOffset + 1] = next.texture[waveformOffset + 0];
next.texture[waveformOffset + 2] = next.monoRms;
next.texture[waveformOffset + 3] = 1.0f;
const float bandPosition = static_cast<float>(x) / static_cast<float>(kAudioTextureWidth - 1);
const float scaled = bandPosition * static_cast<float>(next.bands.size() - 1);
const unsigned bandA = static_cast<unsigned>(std::floor(scaled));
const unsigned bandB = std::min<unsigned>(bandA + 1, static_cast<unsigned>(next.bands.size() - 1));
const float t = scaled - static_cast<float>(bandA);
const float spectrum = next.bands[bandA] * (1.0f - t) + next.bands[bandB] * t;
const std::size_t spectrumOffset = (kAudioTextureWidth + x) * 4;
next.texture[spectrumOffset + 0] = spectrum;
next.texture[spectrumOffset + 1] = next.bands[0];
next.texture[spectrumOffset + 2] = next.bands[1];
next.texture[spectrumOffset + 3] = next.bands[2];
}
mCurrent = next;
return mCurrent;
}