Files
projectm/src/libprojectM/Audio/WaveformAligner.cpp
2024-02-06 07:58:32 -06:00

233 lines
8.2 KiB
C++

#include "WaveformAligner.hpp"
#include <algorithm>
#include <cmath>
#include <iterator>
namespace libprojectM {
namespace Audio {
WaveformAligner::WaveformAligner()
{
static const uint32_t maxOctaves{10};
// For AudioBufferSamples = 576 and WaveformSamples = 480:
// floor(log2(96)) = 6
static const uint32_t numOctaves{static_cast<uint32_t>(std::floor(std::log(AudioBufferSamples - WaveformSamples) / std::log(2.0f)))};
m_octaves = numOctaves > maxOctaves ? maxOctaves : numOctaves;
m_aligmentWeights.resize(m_octaves);
m_firstNonzeroWeights.resize(m_octaves);
m_lastNonzeroWeights.resize(m_octaves);
m_octaveSamples.resize(m_octaves);
m_octaveSampleSpacing.resize(m_octaves);
m_oldWaveformMips.resize(m_octaves);
m_octaveSamples[0] = AudioBufferSamples;
m_octaveSampleSpacing[0] = AudioBufferSamples - WaveformSamples;
for (uint32_t octave = 1; octave < m_octaves; octave++)
{
m_octaveSamples[octave] = m_octaveSamples[octave - 1] / 2;
m_octaveSampleSpacing[octave] = m_octaveSampleSpacing[octave - 1] / 2;
}
}
void WaveformAligner::ResampleOctaves(std::vector<WaveformBuffer>& dstWaveformMips, WaveformBuffer& newWaveform)
{
// Octave 0 is a direct copy of the new waveform
std::copy(newWaveform.begin(), newWaveform.end(), dstWaveformMips[0].begin());
// Calculate mip levels
// This downsamples the previous octave's waveform by a factor of 2
for (uint32_t octave = 1; octave < m_octaves; octave++)
{
for (uint32_t sample = 0; sample < m_octaveSamples[octave]; sample++)
{
dstWaveformMips[octave][sample] = 0.5f * (dstWaveformMips[octave - 1][sample * 2] + dstWaveformMips[octave - 1][sample * 2 + 1]);
}
}
}
void WaveformAligner::GenerateWeights()
{
// The below is performed only on the first fill.
for (uint32_t octave = 0; octave < m_octaves; octave++)
{
// For example:
// m_octaveSampleSpacing[octave] == 4
// m_octaveSamples[octave] == 36
// (so we test 32 samples, w/4 offsets)
uint32_t const compareSamples = m_octaveSamples[octave] - m_octaveSampleSpacing[octave];
for (uint32_t sample = 0; sample < compareSamples; sample++)
{
// Take a reference to the alignment weights and set them with the computation
// below.
auto& weightRef = m_aligmentWeights[octave][sample];
// Start with pyramid-shaped PDF, from 0..1..0
if (sample < compareSamples / 2)
{
weightRef = static_cast<float>(sample * 2) / static_cast<float>(compareSamples);
}
else
{
weightRef = static_cast<float>((compareSamples - 1 - sample) * 2) / static_cast<float>(compareSamples);
}
/*
* TWEAK how much the center matters, vs. the edges:
*
* weight[i] = 5.0*((2*i/compareSamples) - 0.8) + 0.8
* Solving for weight[i] == 0 we get:\
*
* 2*i/compareSamples = -0.8/5 + 0.8
* i = 0.32*compareSamples
*
* The weight distribution is symmetric so the falling side gives:
*
* i = 0.68*compareSamples
*/
weightRef = (weightRef - 0.8f) * 5.0f + 0.8f;
// Clamp
// Needed because the TWEAK above results in weights from -3.2 to 1.8
if (weightRef > 1.0f)
{
weightRef = 1.0f;
}
if (weightRef < 0.0f)
{
weightRef = 0.0f;
}
}
uint32_t sample{};
// The code below also is only needed because of the TWEAK above, which zeroes
// a total of 64% of the weights.
while (m_aligmentWeights[octave][sample] == 0 && sample < compareSamples)
{
sample++;
}
m_firstNonzeroWeights[octave] = sample;
sample = compareSamples - 1;
while (m_aligmentWeights[octave][sample] == 0 && compareSamples > 1)
{
sample--;
}
m_lastNonzeroWeights[octave] = sample;
}
}
int WaveformAligner::CalculateOffset(std::vector<WaveformBuffer>& newWaveformMips)
{
/*
* Note that we use signed variables here because we need to check for negatives even
* if we clamp to only positive values and 0.
*/
int alignOffset{};
int offsetStart{};
int offsetEnd{static_cast<int>(m_octaveSampleSpacing[m_octaves - 1])};
// Find best match for alignment
// Note that we need a signed iterator here because the termination condition is octave < 0
for (int octave = static_cast<int>(m_octaves) - 1; octave >= 0; octave--)
{
int lowestErrorOffset{-1};
float lowestErrorAmount{};
// For each octave, find the offset that maximizes the correlation between waveforms.
for (int sample = offsetStart; sample < offsetEnd; sample++)
{
float errorSum{};
// Perform the cross-correlation. Note that we shift the new waveform but not the old
// one because we're looking for the offset between them that produces the lowest error.
for (uint32_t i = m_firstNonzeroWeights[octave]; i <= m_lastNonzeroWeights[octave]; i++)
{
errorSum += std::abs((newWaveformMips[octave][i + sample] - m_oldWaveformMips[octave][i]) * m_aligmentWeights[octave][i]);
}
if (lowestErrorOffset == -1 || errorSum < lowestErrorAmount)
{
lowestErrorOffset = static_cast<int>(sample);
lowestErrorAmount = errorSum;
}
}
// Now use 'lowestErrorOffset' to guide bounds of search in next octave:
// m_octaveSampleSpacing[octave] == 8
// m_octaveSamples[octave] == 72
// -say 'lowestErrorOffset' was 2
// -that corresponds to samples 4 & 5 of the next octave
// -also, expand about this by 2 samples? YES.
// (so we'd test 64 samples, w/8->4 offsets)
if (octave > 0)
{
offsetStart = lowestErrorOffset * 2 - 1;
offsetEnd = lowestErrorOffset * 2 + 2 + 1;
if (offsetStart < 0)
{
/*
* This line is what prevents us from checking negative offsets.
* There should be no impact to allowing offsetStart to be negative as long as
* its magnitude is less than m_firstNonzeroWeights[octave-1]. However, this
* is what the original milkdrop code does so we stick with that behavior.
*/
offsetStart = 0;
}
if (offsetEnd > static_cast<int>(m_octaveSampleSpacing[octave - 1]))
{
offsetEnd = static_cast<int>(m_octaveSampleSpacing[octave - 1]);
}
}
else
{
alignOffset = lowestErrorOffset;
}
}
return alignOffset;
}
void WaveformAligner::Align(WaveformBuffer& newWaveform)
{
if (m_octaves < 4)
{
// The original code does not align if there isn't enough margin for
// alignment but has no explanation for why the limit is 2**4 samples.
return;
}
std::vector<WaveformBuffer> newWaveformMips(m_octaves, WaveformBuffer());
ResampleOctaves(newWaveformMips, newWaveform);
if (!m_alignWaveReady)
{
GenerateWeights();
// Mark that weights have been calculated.
m_alignWaveReady = true;
}
int alignOffset = CalculateOffset(newWaveformMips);
// Finally, apply the results by scooting the aligned samples so that they start at index 0.
// This is the second place where we limit negative offsets.
if (alignOffset > 0)
{
std::copy_n(newWaveform.begin() + alignOffset, WaveformSamples, newWaveform.begin());
// Set remaining samples to zero.
std::fill_n(newWaveform.begin() + WaveformSamples, AudioBufferSamples - WaveformSamples, 0.0f);
}
// Store mip levels for the next frame. Note that we need to recalculate the mips for the *shifted*
// waveform, so we can't reuse the previous mips.
ResampleOctaves(m_oldWaveformMips, newWaveform);
}
} // namespace Audio
} // namespace libprojectM