207 lines
6.8 KiB
C++
207 lines
6.8 KiB
C++
#include "AudioSupport.h"
|
|
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
#include <iterator>
|
|
#include <limits>
|
|
|
|
namespace
|
|
{
|
|
constexpr float kInt32ToFloat = 1.0f / 2147483648.0f;
|
|
constexpr std::size_t kAnalysisWindowSamples = 1024;
|
|
constexpr std::size_t kMaxBufferedAudioFrames = kAudioSampleRate * 10;
|
|
|
|
float Clamp01(float value)
|
|
{
|
|
return std::max(0.0f, std::min(1.0f, value));
|
|
}
|
|
|
|
float SampleToFloat(int32_t sample)
|
|
{
|
|
return std::max(-1.0f, std::min(1.0f, static_cast<float>(sample) * kInt32ToFloat));
|
|
}
|
|
|
|
float GoertzelMagnitude(const std::vector<float>& samples, float frequency)
|
|
{
|
|
if (samples.empty())
|
|
return 0.0f;
|
|
|
|
const double omega = 2.0 * 3.14159265358979323846 * static_cast<double>(frequency) / static_cast<double>(kAudioSampleRate);
|
|
const double coefficient = 2.0 * std::cos(omega);
|
|
double q0 = 0.0;
|
|
double q1 = 0.0;
|
|
double q2 = 0.0;
|
|
|
|
for (float sample : samples)
|
|
{
|
|
q0 = coefficient * q1 - q2 + static_cast<double>(sample);
|
|
q2 = q1;
|
|
q1 = q0;
|
|
}
|
|
|
|
const double power = q1 * q1 + q2 * q2 - coefficient * q1 * q2;
|
|
return static_cast<float>(std::sqrt(std::max(0.0, power)) / static_cast<double>(samples.size()));
|
|
}
|
|
}
|
|
|
|
uint64_t AudioSampleTimeForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate)
|
|
{
|
|
if (frameTimescale == 0)
|
|
return 0;
|
|
|
|
const uint64_t numerator = videoFrameIndex * frameDuration * audioSampleRate;
|
|
return (numerator + frameTimescale / 2) / frameTimescale;
|
|
}
|
|
|
|
unsigned AudioSamplesForVideoFrame(uint64_t videoFrameIndex, uint64_t frameDuration, uint64_t frameTimescale, uint64_t audioSampleRate)
|
|
{
|
|
const uint64_t start = AudioSampleTimeForVideoFrame(videoFrameIndex, frameDuration, frameTimescale, audioSampleRate);
|
|
const uint64_t end = AudioSampleTimeForVideoFrame(videoFrameIndex + 1, frameDuration, frameTimescale, audioSampleRate);
|
|
return static_cast<unsigned>(end > start ? end - start : 0);
|
|
}
|
|
|
|
void AudioDelayBuffer::Reset(unsigned delaySampleFrames)
|
|
{
|
|
std::lock_guard<std::mutex> lock(mMutex);
|
|
mSamples.clear();
|
|
mSamples.resize(static_cast<std::size_t>(delaySampleFrames) * kAudioChannelCount, 0);
|
|
mUnderrunCount = 0;
|
|
}
|
|
|
|
void AudioDelayBuffer::PushInterleaved(const int32_t* samples, std::size_t sampleFrameCount)
|
|
{
|
|
if (!samples || sampleFrameCount == 0)
|
|
return;
|
|
|
|
std::lock_guard<std::mutex> lock(mMutex);
|
|
const std::size_t sampleCount = sampleFrameCount * kAudioChannelCount;
|
|
for (std::size_t index = 0; index < sampleCount; ++index)
|
|
mSamples.push_back(samples[index]);
|
|
|
|
const std::size_t maxSamples = kMaxBufferedAudioFrames * kAudioChannelCount;
|
|
while (mSamples.size() > maxSamples)
|
|
mSamples.pop_front();
|
|
}
|
|
|
|
AudioFrameBlock AudioDelayBuffer::Pop(std::size_t sampleFrameCount, bool& underrun)
|
|
{
|
|
AudioFrameBlock block;
|
|
block.interleavedSamples.resize(sampleFrameCount * kAudioChannelCount, 0);
|
|
|
|
std::lock_guard<std::mutex> lock(mMutex);
|
|
const std::size_t requestedSamples = sampleFrameCount * kAudioChannelCount;
|
|
underrun = mSamples.size() < requestedSamples;
|
|
if (underrun)
|
|
++mUnderrunCount;
|
|
|
|
const std::size_t availableSamples = std::min(requestedSamples, mSamples.size());
|
|
for (std::size_t index = 0; index < availableSamples; ++index)
|
|
{
|
|
block.interleavedSamples[index] = mSamples.front();
|
|
mSamples.pop_front();
|
|
}
|
|
|
|
return block;
|
|
}
|
|
|
|
unsigned AudioDelayBuffer::BufferedSampleFrames() const
|
|
{
|
|
std::lock_guard<std::mutex> lock(mMutex);
|
|
return static_cast<unsigned>(mSamples.size() / kAudioChannelCount);
|
|
}
|
|
|
|
uint64_t AudioDelayBuffer::UnderrunCount() const
|
|
{
|
|
std::lock_guard<std::mutex> lock(mMutex);
|
|
return mUnderrunCount;
|
|
}
|
|
|
|
void AudioAnalyzer::Reset()
|
|
{
|
|
mMonoHistory.clear();
|
|
mSmoothedBands = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
mCurrent = AudioAnalysisSnapshot();
|
|
}
|
|
|
|
AudioAnalysisSnapshot AudioAnalyzer::Analyze(const AudioFrameBlock& block)
|
|
{
|
|
AudioAnalysisSnapshot next;
|
|
double sumSquares[2] = { 0.0, 0.0 };
|
|
float peak[2] = { 0.0f, 0.0f };
|
|
double monoSumSquares = 0.0;
|
|
float monoPeak = 0.0f;
|
|
const std::size_t frames = block.frameCount();
|
|
|
|
for (std::size_t frame = 0; frame < frames; ++frame)
|
|
{
|
|
const float left = SampleToFloat(block.interleavedSamples[frame * 2]);
|
|
const float right = SampleToFloat(block.interleavedSamples[frame * 2 + 1]);
|
|
const float mono = (left + right) * 0.5f;
|
|
|
|
sumSquares[0] += static_cast<double>(left) * left;
|
|
sumSquares[1] += static_cast<double>(right) * right;
|
|
peak[0] = std::max(peak[0], std::abs(left));
|
|
peak[1] = std::max(peak[1], std::abs(right));
|
|
monoSumSquares += static_cast<double>(mono) * mono;
|
|
monoPeak = std::max(monoPeak, std::abs(mono));
|
|
|
|
mMonoHistory.push_back(mono);
|
|
while (mMonoHistory.size() > kAnalysisWindowSamples)
|
|
mMonoHistory.pop_front();
|
|
}
|
|
|
|
if (frames > 0)
|
|
{
|
|
next.rms[0] = static_cast<float>(std::sqrt(sumSquares[0] / static_cast<double>(frames)));
|
|
next.rms[1] = static_cast<float>(std::sqrt(sumSquares[1] / static_cast<double>(frames)));
|
|
next.peak[0] = peak[0];
|
|
next.peak[1] = peak[1];
|
|
next.monoRms = static_cast<float>(std::sqrt(monoSumSquares / static_cast<double>(frames)));
|
|
next.monoPeak = monoPeak;
|
|
}
|
|
|
|
std::vector<float> window(mMonoHistory.begin(), mMonoHistory.end());
|
|
const float bandFrequencies[4] = { 90.0f, 300.0f, 1200.0f, 5000.0f };
|
|
for (std::size_t band = 0; band < next.bands.size(); ++band)
|
|
{
|
|
const float raw = Clamp01(GoertzelMagnitude(window, bandFrequencies[band]) * 8.0f);
|
|
const float smoothing = raw > mSmoothedBands[band] ? 0.45f : 0.12f;
|
|
mSmoothedBands[band] = mSmoothedBands[band] + (raw - mSmoothedBands[band]) * smoothing;
|
|
next.bands[band] = Clamp01(mSmoothedBands[band]);
|
|
}
|
|
|
|
for (unsigned x = 0; x < kAudioTextureWidth; ++x)
|
|
{
|
|
float mono = 0.0f;
|
|
if (!mMonoHistory.empty())
|
|
{
|
|
const std::size_t historyIndex = static_cast<std::size_t>(
|
|
(static_cast<uint64_t>(x) * static_cast<uint64_t>(mMonoHistory.size())) / kAudioTextureWidth);
|
|
auto it = mMonoHistory.begin();
|
|
std::advance(it, std::min(historyIndex, mMonoHistory.size() - 1));
|
|
mono = *it;
|
|
}
|
|
|
|
const std::size_t waveformOffset = x * 4;
|
|
next.texture[waveformOffset + 0] = mono * 0.5f + 0.5f;
|
|
next.texture[waveformOffset + 1] = next.texture[waveformOffset + 0];
|
|
next.texture[waveformOffset + 2] = next.monoRms;
|
|
next.texture[waveformOffset + 3] = 1.0f;
|
|
|
|
const float bandPosition = static_cast<float>(x) / static_cast<float>(kAudioTextureWidth - 1);
|
|
const float scaled = bandPosition * static_cast<float>(next.bands.size() - 1);
|
|
const unsigned bandA = static_cast<unsigned>(std::floor(scaled));
|
|
const unsigned bandB = std::min<unsigned>(bandA + 1, static_cast<unsigned>(next.bands.size() - 1));
|
|
const float t = scaled - static_cast<float>(bandA);
|
|
const float spectrum = next.bands[bandA] * (1.0f - t) + next.bands[bandB] * t;
|
|
const std::size_t spectrumOffset = (kAudioTextureWidth + x) * 4;
|
|
next.texture[spectrumOffset + 0] = spectrum;
|
|
next.texture[spectrumOffset + 1] = next.bands[0];
|
|
next.texture[spectrumOffset + 2] = next.bands[1];
|
|
next.texture[spectrumOffset + 3] = next.bands[2];
|
|
}
|
|
|
|
mCurrent = next;
|
|
return mCurrent;
|
|
}
|