#include "WaveformAligner.hpp" #include #include #include namespace libprojectM { namespace Audio { WaveformAligner::WaveformAligner() { static const uint32_t maxOctaves{10}; // For AudioBufferSamples = 576 and WaveformSamples = 480: // floor(log2(96)) = 6 static const uint32_t numOctaves{static_cast(std::floor(std::log(AudioBufferSamples - WaveformSamples) / std::log(2.0f)))}; m_octaves = numOctaves > maxOctaves ? maxOctaves : numOctaves; m_aligmentWeights.resize(m_octaves); m_firstNonzeroWeights.resize(m_octaves); m_lastNonzeroWeights.resize(m_octaves); m_octaveSamples.resize(m_octaves); m_octaveSampleSpacing.resize(m_octaves); m_oldWaveformMips.resize(m_octaves); m_octaveSamples[0] = AudioBufferSamples; m_octaveSampleSpacing[0] = AudioBufferSamples - WaveformSamples; for (uint32_t octave = 1; octave < m_octaves; octave++) { m_octaveSamples[octave] = m_octaveSamples[octave - 1] / 2; m_octaveSampleSpacing[octave] = m_octaveSampleSpacing[octave - 1] / 2; } } void WaveformAligner::ResampleOctaves(std::vector& dstWaveformMips, WaveformBuffer& newWaveform) { // Octave 0 is a direct copy of the new waveform std::copy(newWaveform.begin(), newWaveform.end(), dstWaveformMips[0].begin()); // Calculate mip levels // This downsamples the previous octave's waveform by a factor of 2 for (uint32_t octave = 1; octave < m_octaves; octave++) { for (uint32_t sample = 0; sample < m_octaveSamples[octave]; sample++) { dstWaveformMips[octave][sample] = 0.5f * (dstWaveformMips[octave - 1][sample * 2] + dstWaveformMips[octave - 1][sample * 2 + 1]); } } } void WaveformAligner::GenerateWeights() { // The below is performed only on the first fill. for (uint32_t octave = 0; octave < m_octaves; octave++) { // For example: // m_octaveSampleSpacing[octave] == 4 // m_octaveSamples[octave] == 36 // (so we test 32 samples, w/4 offsets) uint32_t const compareSamples = m_octaveSamples[octave] - m_octaveSampleSpacing[octave]; for (uint32_t sample = 0; sample < compareSamples; sample++) { // Take a reference to the alignment weights and set them with the computation // below. auto& weightRef = m_aligmentWeights[octave][sample]; // Start with pyramid-shaped PDF, from 0..1..0 if (sample < compareSamples / 2) { weightRef = static_cast(sample * 2) / static_cast(compareSamples); } else { weightRef = static_cast((compareSamples - 1 - sample) * 2) / static_cast(compareSamples); } /* * TWEAK how much the center matters, vs. the edges: * * weight[i] = 5.0*((2*i/compareSamples) - 0.8) + 0.8 * Solving for weight[i] == 0 we get:\ * * 2*i/compareSamples = -0.8/5 + 0.8 * i = 0.32*compareSamples * * The weight distribution is symmetric so the falling side gives: * * i = 0.68*compareSamples */ weightRef = (weightRef - 0.8f) * 5.0f + 0.8f; // Clamp // Needed because the TWEAK above results in weights from -3.2 to 1.8 if (weightRef > 1.0f) { weightRef = 1.0f; } if (weightRef < 0.0f) { weightRef = 0.0f; } } uint32_t sample{}; // The code below also is only needed because of the TWEAK above, which zeroes // a total of 64% of the weights. while (m_aligmentWeights[octave][sample] == 0 && sample < compareSamples) { sample++; } m_firstNonzeroWeights[octave] = sample; sample = compareSamples - 1; while (m_aligmentWeights[octave][sample] == 0 && compareSamples > 1) { sample--; } m_lastNonzeroWeights[octave] = sample; } } int WaveformAligner::CalculateOffset(std::vector& newWaveformMips) { /* * Note that we use signed variables here because we need to check for negatives even * if we clamp to only positive values and 0. */ int alignOffset{}; int offsetStart{}; int offsetEnd{static_cast(m_octaveSampleSpacing[m_octaves - 1])}; // Find best match for alignment // Note that we need a signed iterator here because the termination condition is octave < 0 for (int octave = static_cast(m_octaves) - 1; octave >= 0; octave--) { int lowestErrorOffset{-1}; float lowestErrorAmount{}; // For each octave, find the offset that maximizes the correlation between waveforms. for (int sample = offsetStart; sample < offsetEnd; sample++) { float errorSum{}; // Perform the cross-correlation. Note that we shift the new waveform but not the old // one because we're looking for the offset between them that produces the lowest error. for (uint32_t i = m_firstNonzeroWeights[octave]; i <= m_lastNonzeroWeights[octave]; i++) { errorSum += std::abs((newWaveformMips[octave][i + sample] - m_oldWaveformMips[octave][i]) * m_aligmentWeights[octave][i]); } if (lowestErrorOffset == -1 || errorSum < lowestErrorAmount) { lowestErrorOffset = static_cast(sample); lowestErrorAmount = errorSum; } } // Now use 'lowestErrorOffset' to guide bounds of search in next octave: // m_octaveSampleSpacing[octave] == 8 // m_octaveSamples[octave] == 72 // -say 'lowestErrorOffset' was 2 // -that corresponds to samples 4 & 5 of the next octave // -also, expand about this by 2 samples? YES. // (so we'd test 64 samples, w/8->4 offsets) if (octave > 0) { offsetStart = lowestErrorOffset * 2 - 1; offsetEnd = lowestErrorOffset * 2 + 2 + 1; if (offsetStart < 0) { /* * This line is what prevents us from checking negative offsets. * There should be no impact to allowing offsetStart to be negative as long as * its magnitude is less than m_firstNonzeroWeights[octave-1]. However, this * is what the original milkdrop code does so we stick with that behavior. */ offsetStart = 0; } if (offsetEnd > static_cast(m_octaveSampleSpacing[octave - 1])) { offsetEnd = static_cast(m_octaveSampleSpacing[octave - 1]); } } else { alignOffset = lowestErrorOffset; } } return alignOffset; } void WaveformAligner::Align(WaveformBuffer& newWaveform) { if (m_octaves < 4) { // The original code does not align if there isn't enough margin for // alignment but has no explanation for why the limit is 2**4 samples. return; } std::vector newWaveformMips(m_octaves, WaveformBuffer()); ResampleOctaves(newWaveformMips, newWaveform); if (!m_alignWaveReady) { GenerateWeights(); // Mark that weights have been calculated. m_alignWaveReady = true; } int alignOffset = CalculateOffset(newWaveformMips); // Finally, apply the results by scooting the aligned samples so that they start at index 0. // This is the second place where we limit negative offsets. if (alignOffset > 0) { std::copy_n(newWaveform.begin() + alignOffset, WaveformSamples, newWaveform.begin()); // Set remaining samples to zero. std::fill_n(newWaveform.begin() + WaveformSamples, AudioBufferSamples - WaveformSamples, 0.0f); } // Store mip levels for the next frame. Note that we need to recalculate the mips for the *shifted* // waveform, so we can't reuse the previous mips. ResampleOctaves(m_oldWaveformMips, newWaveform); } } // namespace Audio } // namespace libprojectM